From a3d494070b4392fac3aa5d5f64bcbc9ab1577384 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 25 Feb 2025 17:26:28 -0800 Subject: [PATCH 001/166] Fix Docker Casing Warnings --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index b76802f3..0acd8c06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # python-base # Set up shared environment variables ################################ -FROM python:3.9 as python-base +FROM python:3.9 AS python-base # Poetry # https://python-poetry.org/docs/configuration/#using-environment-variables @@ -69,7 +69,7 @@ RUN samtools faidx GCF_000001405.39_GRCh38.p13_genomic.fna.gz # builder # Builds application dependencies and creates venv ################################ -FROM python-base as builder +FROM python-base AS builder WORKDIR /code @@ -90,7 +90,7 @@ COPY src/mavedb/server_main.py /code/main.py # worker # Worker image ################################ -FROM builder as worker +FROM builder AS worker COPY --from=downloader /data /data # copy pre-built poetry + venv @@ -103,7 +103,7 @@ CMD ["arq", "mavedb.worker.WorkerSettings"] # application # Application image ################################ -FROM builder as application +FROM builder AS application COPY --from=downloader /data /data # copy pre-built poetry + venv From a7c39af1bf38e1a22b2ed02fe0df47f24276b68f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:24:15 -0800 Subject: [PATCH 002/166] Refactor Dataframe Validation Logic Refactors dataframe validation logic into 3 component files: column.py, dataframe.py, and variant.py. This simplifies the validation structure and logically separates validation function based on the part of the df they operate on. --- src/mavedb/lib/validation/__init__.py | 0 src/mavedb/lib/validation/constants/target.py | 1 + src/mavedb/lib/validation/dataframe.py | 782 ------------------ src/mavedb/lib/validation/dataframe/column.py | 252 ++++++ .../lib/validation/dataframe/dataframe.py | 371 +++++++++ .../lib/validation/dataframe/variant.py | 329 ++++++++ src/mavedb/lib/validation/py.typed | 0 src/mavedb/worker/jobs.py | 2 +- 8 files changed, 954 insertions(+), 783 deletions(-) create mode 100644 src/mavedb/lib/validation/__init__.py delete mode 100644 src/mavedb/lib/validation/dataframe.py create mode 100644 src/mavedb/lib/validation/dataframe/column.py create mode 100644 src/mavedb/lib/validation/dataframe/dataframe.py create mode 100644 src/mavedb/lib/validation/dataframe/variant.py create mode 100644 src/mavedb/lib/validation/py.typed diff --git a/src/mavedb/lib/validation/__init__.py b/src/mavedb/lib/validation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/lib/validation/constants/target.py b/src/mavedb/lib/validation/constants/target.py index f64b4bd4..0741be09 100644 --- a/src/mavedb/lib/validation/constants/target.py +++ b/src/mavedb/lib/validation/constants/target.py @@ -1 +1,2 @@ valid_sequence_types = ["infer", "dna", "protein"] +strict_valid_sequence_types = ["dna", "protein"] diff --git a/src/mavedb/lib/validation/dataframe.py b/src/mavedb/lib/validation/dataframe.py deleted file mode 100644 index 2d7bdffc..00000000 --- a/src/mavedb/lib/validation/dataframe.py +++ /dev/null @@ -1,782 +0,0 @@ -from typing import Optional, Tuple, Union - -import hgvs.exceptions -import hgvs.parser -import hgvs.validator -import numpy as np -import pandas as pd -from cdot.hgvs.dataproviders import RESTDataProvider -from fqfa.util.translate import translate_dna -from mavehgvs.exceptions import MaveHgvsParseError -from mavehgvs.variant import Variant - -from mavedb.lib.exceptions import MixedTargetError -from mavedb.lib.validation.constants.general import ( - hgvs_nt_column, - hgvs_pro_column, - hgvs_splice_column, - required_score_column, -) -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.target_accession import TargetAccession -from mavedb.models.target_gene import TargetGene -from mavedb.models.target_sequence import TargetSequence - -# handle with pandas all null strings -# provide a csv or a pandas dataframe -# take dataframe, output as csv to temp directory, use standard library - - -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) - - -def infer_column_type(col: pd.Series) -> str: - """Infer whether the given column contains string or numeric data. - - The function returns "string" for string columns or "numeric" for numeric columns. - If there is a mixture of types it returns "mixed". - If every value in the column is `None` or NA it returns "empty". - - Parameters - ---------- - col : pandas.Series - The column to inspect - - Returns - ------- - str - One of "string", "numeric", "mixed", or "empty" - """ - if col.isna().all(): - return "empty" - else: - col_numeric = pd.to_numeric(col, errors="coerce") - if col_numeric.isna().all(): # nothing converted to a number - return "string" - elif np.all(col.isna() == col_numeric.isna()): # all non-NA values converted - return "numeric" - else: # some values converted but not all - return "mixed" - - -def sort_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame: - """Sort the columns of the given dataframe according to the expected ordering in MaveDB. - - MaveDB expects that dataframes have columns in the following order (note some columns are optional): - * hgvs_nt - * hgvs_splice - * hgvs_pro - * score - * other - - Parameters - ---------- - df : pandas.DataFrame - The dataframe with columns to sort - - Returns - ------- - pandas.DataFrame - The dataframe with the same data but sorted columns - """ - - def column_sort_function(value, columns): - if value.lower() in STANDARD_COLUMNS: - return STANDARD_COLUMNS.index(value.lower()) - else: - return columns.index(value) + len(STANDARD_COLUMNS) - - old_columns = list(df.columns) - new_columns = sorted(old_columns, key=lambda v: column_sort_function(v, old_columns)) - - return df[new_columns] - - -def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: - """Standardize a dataframe by sorting the columns and changing the standard column names to lowercase. - - The standard column names are: - * hgvs_nt - * hgvs_splice - * hgvs_pro - * score - - Case for other columns is preserved. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to standardize - - Returns - ------- - pandas.DataFrame - The standardized dataframe - """ - column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS} - - df.rename(columns=column_mapper, inplace=True) - - return sort_dataframe_columns(df) - - -def validate_and_standardize_dataframe_pair( - scores_df: pd.DataFrame, counts_df: Optional[pd.DataFrame], targets: list[TargetGene], hdp: RESTDataProvider -) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: - """ - Perform validation and standardization on a pair of score and count dataframes. - - Parameters - ---------- - scores_df : pandas.DataFrame - The scores dataframe - counts_df : Optional[pandas.DataFrame] - The counts dataframe, can be None if not present - targets : str - The target genes on which to validate dataframes - hdp : RESTDataProvider - The biocommons.hgvs compatible data provider. Used to fetch sequences for hgvs validation. - - Returns - ------- - Tuple[pd.DataFrame, Optional[pd.DataFrame]] - The standardized score and count dataframes, or score and None if no count dataframe was provided - - Raises - ------ - ValidationError - If one of the validation functions raises an exception - """ - if not targets: - raise ValueError("Can't validate provided file with no targets.") - - validate_dataframe(scores_df, "scores", targets, hdp) - if counts_df is not None: - validate_dataframe(counts_df, "counts", targets, hdp) - validate_variant_columns_match(scores_df, counts_df) - - new_scores_df = standardize_dataframe(scores_df) - new_counts_df = standardize_dataframe(counts_df) if counts_df is not None else None - return new_scores_df, new_counts_df - - -def validate_dataframe(df: pd.DataFrame, kind: str, targets: list["TargetGene"], hdp: RESTDataProvider) -> None: - """ - Validate that a given dataframe passes all checks. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to validate - kind : str - The kind of dataframe "counts" or "scores" - target_seq : str - The target sequence to validate variants against - target_seq_type : str - The kind of target sequence, one of "infer" "dna" or "protein" - - Returns - ------- - None - - Raises - ------ - ValidationError - If one of the validators called raises an exception - """ - # basic checks - validate_column_names(df, kind) - validate_no_null_rows(df) - - column_mapping = {c.lower(): c for c in df.columns} - index_column = choose_dataframe_index_column(df) - - prefixes: dict[str, Optional[str]] = dict() - for c in column_mapping: - if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - is_index = column_mapping[c] == index_column - prefixes[c] = None - - # Ignore validation for null non-index hgvs columns - if df[column_mapping[c]].isna().all() and not is_index: - continue - - score_set_is_accession_based = all(target.target_accession for target in targets) - score_set_is_sequence_based = all(target.target_sequence for target in targets) - - # This is typesafe, despite Pylance's claims otherwise - if score_set_is_accession_based and not score_set_is_sequence_based: - validate_hgvs_genomic_column( - df[column_mapping[c]], - is_index, - [target.target_accession for target in targets], - hdp, # type: ignore - ) - elif score_set_is_sequence_based and not score_set_is_accession_based: - validate_hgvs_transgenic_column( - df[column_mapping[c]], - is_index, - {target.target_sequence.label: target.target_sequence for target in targets}, # type: ignore - ) - else: - raise MixedTargetError("Could not validate dataframe against provided mixed target types.") - - # post validation, handle prefixes. We've already established these columns are non-null - if score_set_is_accession_based or len(targets) > 1: - prefixes[c] = ( - df[column_mapping[c]].dropna()[0].split(" ")[0].split(":")[1][0] - ) # Just take the first prefix, we validate consistency elsewhere - else: - prefixes[c] = df[column_mapping[c]].dropna()[0][0] - - else: - force_numeric = (c == required_score_column) or (kind == "counts") - validate_data_column(df[column_mapping[c]], force_numeric) - - validate_hgvs_prefix_combinations( - hgvs_nt=prefixes[hgvs_nt_column], - hgvs_splice=prefixes[hgvs_splice_column], - hgvs_pro=prefixes[hgvs_pro_column], - transgenic=all(target.target_sequence for target in targets), - ) - - -def validate_column_names(df: pd.DataFrame, kind: str) -> None: - """Validate the column names in a dataframe. - - This function validates the column names in the given dataframe. - It can be run for either a "scores" dataframe or a "counts" dataframe. - A "scores" dataframe must have a column named 'score' and a "counts" dataframe cannot have a column named 'score'. - - The function also checks for a valid combination of columns that define variants. - - Basic checks are performed to make sure that a column name is not empty, null, or whitespace, - as well as making sure there are no duplicate column names. - - Parameters - ---------- - df : pandas.DataFrame - The scores or counts dataframe to be validated - - kind : str - Either "counts" or "scores" depending on the kind of dataframe being validated - - Raises - ------ - ValidationError - If the column names are not valid - """ - if any(type(c) is not str for c in df.columns): - raise ValidationError("column names must be strings") - - if any(c.isspace() for c in df.columns) or any(len(c) == 0 for c in df.columns): - raise ValidationError("column names cannot be empty or whitespace") - - columns = [c.lower() for c in df.columns] - - if kind == "scores": - if required_score_column not in columns: - raise ValidationError(f"score dataframe must have a '{required_score_column}' column") - elif kind == "counts": - if required_score_column in columns: - raise ValidationError(f"counts dataframe must not have a '{required_score_column}' column") - else: - raise ValueError("kind only accepts scores and counts") - - if hgvs_splice_column in columns: - if hgvs_nt_column not in columns or hgvs_pro_column not in columns: - raise ValidationError( - f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" - ) - - if len(columns) != len(set(columns)): - raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") - - if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): - raise ValidationError("dataframe does not define any variant columns") - - if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): - raise ValidationError("dataframe does not define any data columns") - - -def validate_no_null_rows(df: pd.DataFrame) -> None: - """Check that there are no fully null rows in the dataframe. - - Parameters - __________ - df : pandas.DataFrame - The scores or counts dataframe being validated - - Raises - ______ - ValidationError - If there are null rows in the dataframe - """ - if any(df.isnull().all(axis=1)): - raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") - - -def choose_dataframe_index_column(df: pd.DataFrame) -> str: - """ - Identify the HGVS variant column that should be used as the index column in this dataframe. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to check - - Returns - ------- - str - The column name of the index column - - Raises - ------ - ValidationError - If no valid HGVS variant column is found - """ - column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} - - if hgvs_nt_column in column_mapping: - return column_mapping[hgvs_nt_column] - elif hgvs_pro_column in column_mapping: - return column_mapping[hgvs_pro_column] - else: - raise ValidationError("failed to find valid HGVS variant column") - - -def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: dict[str, "TargetSequence"]) -> None: - """ - Validate the variants in an HGVS column from a dataframe. - - Tests whether the column has a correct and consistent prefix. - This function also validates all individual variants in the column and checks for agreement against the target - sequence (for non-splice variants). - - Implementation NOTE: We assume variants will only be presented as fully qualified (accession:variant) - if this column is being validated against multiple targets. - - Parameters - ---------- - column : pd.Series - The column from the dataframe to validate - is_index : bool - True if this is the index column for the dataframe and therefore cannot have missing values; else False - targets : dict - Dictionary containing a mapping of target gene names to their sequences. - - Returns - ------- - None - - Raises - ------ - ValueError - If the target sequence does is not dna or protein (or inferred as dna or protein) - ValueError - If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) - ValidationError - If one of the variants fails validation - """ - valid_sequence_types = ("dna", "protein") - validate_variant_column(column, is_index) - prefixes = generate_variant_prefixes(column) - validate_variant_formatting(column, prefixes, list(targets.keys()), len(targets) > 1) - - observed_sequence_types = [target.sequence_type for target in targets.values()] - invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) - if invalid_sequence_types: - raise ValueError( - f"Some targets are invalid sequence types: {invalid_sequence_types}. Sequence types shoud be one of: {valid_sequence_types}" - ) - - # If this is the `hgvs_nt` column, at least one target should be of type `dna`. - if str(column.name).lower() == hgvs_nt_column: - if "dna" not in observed_sequence_types: - raise ValueError( - f"invalid target sequence type(s) for '{column.name}'. At least one target should be of type `dna`. Observed types: {observed_sequence_types}" - ) - - # Make sure this column is either the splice column or protein column. - elif str(column.name).lower() != hgvs_splice_column and str(column.name).lower() != hgvs_pro_column: - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - # Build dictionary of target sequences based on the column we are validating. - target_seqs: dict[str, Union[str, None]] = {} - for name, target in targets.items(): - if str(column.name).lower() == hgvs_nt_column: - target_seqs[name] = target.sequence - - # don't validate splice columns against provided sequences. - elif str(column.name).lower() == hgvs_splice_column: - target_seqs[name] = None - - # translate the target sequence if needed. - elif str(column.name).lower() == hgvs_pro_column: - if target.sequence_type == "dna" and target.sequence is not None: - target_seqs[name] = translate_dna(target.sequence)[0] - else: - target_seqs[name] = target.sequence - - # get a list of all invalid variants - invalid_variants = list() - for i, s in column.items(): - if not s: - continue - - # variants can exist on the same line separated by a space - for variant in s.split(" "): - # When there are multiple targets, treat provided variants as fully qualified. - if len(targets) > 1: - name, variant = str(variant).split(":") - else: - name = list(targets.keys())[0] - if variant is not None: - try: - Variant(variant, targetseq=target_seqs[name]) - except MaveHgvsParseError: - try: - Variant(variant) # note this will get called a second time for splice variants - except MaveHgvsParseError: - invalid_variants.append(f"invalid variant string '{variant}' at row {i} for sequence {name}") - else: - invalid_variants.append( - f"target sequence mismatch for '{variant}' at row {i} for sequence {name}" - ) - - # format and raise an error message that contains all invalid variants - if len(invalid_variants) > 0: - raise ValidationError( - f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants - ) - - -def validate_hgvs_genomic_column( - column: pd.Series, is_index: bool, targets: list["TargetAccession"], hdp: RESTDataProvider -) -> None: - """ - Validate the variants in an HGVS column from a dataframe. - - Tests whether the column has a correct and consistent prefix. - This function also validates all individual variants in the column and checks for agreement against the target - sequence (for non-splice variants). - - Parameters - ---------- - column : pd.Series - The column from the dataframe to validate - is_index : bool - True if this is the index column for the dataframe and therefore cannot have missing values; else False - targets : list - Dictionary containing a list of target accessions. - - Returns - ------- - None - - Raises - ------ - ValueError - If the target sequence does is not dna or protein (or inferred as dna or protein) - ValueError - If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) - ValidationError - If one of the variants fails validation - """ - validate_variant_column(column, is_index) - prefixes = generate_variant_prefixes(column) - validate_variant_formatting( - column, prefixes, [target.accession for target in targets if target.accession is not None], True - ) - - # validate the individual variant strings - # prepare the target sequences for validation - target_seqs: dict[str, Union[str, None]] = {} - for target in targets: - assert target.accession is not None - # We shouldn't have to worry about translating protein sequences when we deal with accession based variants - if str(column.name).lower() == hgvs_nt_column or str(column.name).lower() == hgvs_pro_column: - target_seqs[target.accession] = target.accession - - # TODO: no splice col for genomic coordinate variants? - elif str(column.name).lower() == hgvs_splice_column: - target_seqs[target.accession] = None # don't validate splice variants against a target sequence - - else: - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - hp = hgvs.parser.Parser() - vr = hgvs.validator.Validator(hdp=hdp) - - invalid_variants = list() - for i, s in column.items(): - if s is not None: - for variant in s.split(" "): - try: - # We set strict to `False` to suppress validation warnings about intronic variants. - vr.validate(hp.parse(variant), strict=False) - except hgvs.exceptions.HGVSError as e: - invalid_variants.append(f"Failed to parse row {i} with HGVS exception: {e}") - - # format and raise an error message that contains all invalid variants - if len(invalid_variants) > 0: - raise ValidationError( - f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants - ) - - -def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: list[str], fully_qualified: bool): - """ - Validate the formatting of HGVS variants present in the passed column against - lists of prefixes and targets - - Parameters - ---------- - column : pd.Series - A pandas column containing HGVS variants - prefixes : list[str] - A list of prefixes we can expect to occur within the passed column - targets : list[str] - A list of targets we can expect to occur within the passed column - - Returns - ------- - None - - Raises - ------ - ValidationError - If any of the variants in the column are not fully qualified with respect to multiple possible targets - ValidationError - If the column contains multiple prefixes or the wrong prefix for that column name - ValidationError - If the column contains target accessions not present in the list of possible targets - """ - variants = [variant for s in column.dropna() for variant in s.split(" ")] - - # if there is more than one target, we expect variants to be fully qualified - if fully_qualified: - if not all(len(str(v).split(":")) == 2 for v in variants): - raise ValidationError( - f"variant column '{column.name}' needs fully qualified coordinates when validating against multiple targets" - ) - if len(set(str(v).split(":")[1][:2] for v in variants)) > 1: - raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") - if not all(str(v).split(":")[1][:2] in prefixes for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") - if not all(str(v).split(":")[0] in targets for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") - - else: - if len(set(v[:2] for v in variants)) > 1: - raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") - if not all(v[:2] in prefixes for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") - - -def generate_variant_prefixes(column: pd.Series): - """ - Generate variant prefixes for the provided column - - Parameters - ---------- - column : pd.Series - The pandas column from which to generate variant prefixes - - Returns - ------- - None - - Raises - ------ - ValueError - If the provided pandas column has an unrecognized variant column name - """ - if str(column.name).lower() == hgvs_nt_column: - return [f"{a}." for a in "cngmo"] - if str(column.name).lower() == hgvs_splice_column: - return [f"{a}." for a in "cn"] - if str(column.name).lower() == hgvs_pro_column: - return ["p."] - - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - -def validate_variant_column(column: pd.Series, is_index: bool): - """ - Validate critical column properties of an HGVS variant column, with special - attention to certain properties expected on index columns - - Parameters - ---------- - column : pd.Series - The pandas column containing HGVS variant information - id_index : bool - Whether the provided column is the index column - - Returns - ------- - None - - Raises - ------ - ValidationError - If an index column contains missing or non-unique values - ValidationError - If a column contains any numeric data - """ - if infer_column_type(column) not in ("string", "empty"): - raise ValidationError(f"variant column '{column.name}' cannot contain numeric data") - if column.isna().any() and is_index: - raise ValidationError(f"primary variant column '{column.name}' cannot contain null values") - if not column.is_unique and is_index: - raise ValidationError(f"primary variant column '{column.name}' must contain unique values") - - -def validate_hgvs_prefix_combinations( - hgvs_nt: Optional[str], hgvs_splice: Optional[str], hgvs_pro: Optional[str], transgenic: bool -) -> None: - """ - Validate the combination of HGVS variant prefixes. - - This function assumes that other validation, such as checking that all variants in the column have the same prefix, - has already been performed. - - Parameters - ---------- - hgvs_nt : Optional[str] - The first character (prefix) of the HGVS nucleotide variant strings, or None if not used. - hgvs_splice : Optional[str] - The first character (prefix) of the HGVS splice variant strings, or None if not used. - hgvs_pro : Optional[str] - The first character (prefix) of the HGVS protein variant strings, or None if not used. - transgenic : bool - Whether we should validate these prefix combinations as transgenic variants - - Returns - ------- - None - - Raises - ------ - ValueError - If upstream validation failed and an invalid prefix string was passed to this function - ValidationError - If the combination of prefixes is not valid - """ - # ensure that the prefixes are valid - this validation should have been performed before this function was called - if hgvs_nt not in list("cngmo") + [None]: - raise ValueError("invalid nucleotide prefix") - if hgvs_splice not in list("cn") + [None]: - raise ValueError("invalid nucleotide prefix") - if hgvs_pro not in ["p", None]: - raise ValueError("invalid protein prefix") - - # test agreement of prefixes across columns - if hgvs_splice is not None: - if hgvs_nt not in list("gmo"): - raise ValidationError("nucleotide variants must use valid genomic prefix when splice variants are present") - if hgvs_pro is not None: - if hgvs_splice != "c": - raise ValidationError("splice variants' must use 'c.' prefix when protein variants are present") - else: - if hgvs_splice != "n": - raise ValidationError("splice variants must use 'n.' prefix when protein variants are not present") - elif hgvs_pro is not None and hgvs_nt is not None: - if hgvs_nt != "c": - raise ValidationError( - "nucleotide variants must use 'c.' prefix when protein variants are present and splicing variants are" - " not present" - ) - # Only raise if this data will not be validated by biocommons.hgvs - elif hgvs_nt is not None: # just hgvs_nt - if hgvs_nt != "n" and transgenic: - raise ValidationError("nucleotide variants must use 'n.' prefix when only nucleotide variants are defined") - - -def validate_variant_consistency(df: pd.DataFrame) -> None: - """ - Ensure that variants defined in a single row describe the same variant. - - Parameters - ---------- - df : pd.DataFrame - - Returns - ------- - None - - """ - # TODO - pass - - -def validate_data_column(column: pd.Series, force_numeric: bool = False) -> None: - """ - Validate the contents of a data column. - - Parameters - ---------- - column : pandas.Series - A data column from a dataframe - force_numeric : bool - Force the data to be numeric, used for score column and count data - - Returns - ------- - None - - Raises - ------ - ValidationError - If the data is all null - ValidationError - If the data is of mixed numeric and string types - ValidationError - If the data is not numeric and force_numeric is True - - """ - column_type = infer_column_type(column) - if column_type == "empty": - raise ValidationError(f"data column '{column.name}' contains no data") - elif column_type == "mixed": - raise ValidationError(f"data column '{column.name}' has mixed string and numeric types") - elif force_numeric and column_type != "numeric": - raise ValidationError(f"data column '{column.name}' must contain only numeric data") - - -def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): - """ - Checks if two dataframes have matching HGVS columns. - - The check performed is order-independent. - This function is used to validate a pair of scores and counts dataframes that were uploaded together. - - Parameters - ---------- - df1 : pandas.DataFrame - Dataframe parsed from an uploaded scores file - df2 : pandas.DataFrame - Dataframe parsed from an uploaded counts file - - Raises - ------ - ValidationError - If both dataframes do not define the same variant columns - ValidationError - If both dataframes do not define the same variants within each column - """ - for c in df1.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - if c not in df2: - raise ValidationError("both score and count dataframes must define matching HGVS columns") - elif df1[c].isnull().all() and df2[c].isnull().all(): - continue - elif np.any(df1[c].sort_values().values != df2[c].sort_values().values): - raise ValidationError( - f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" - ) - for c in df2.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - if c not in df1: - raise ValidationError("both score and count dataframes must define matching HGVS columns") diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py new file mode 100644 index 00000000..8505a8cc --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -0,0 +1,252 @@ +from typing import Optional + +import numpy as np +import pandas as pd +from fqfa.util.translate import translate_dna + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, +) +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.target_sequence import TargetSequence + + +def infer_column_type(col: pd.Series) -> str: + """Infer whether the given column contains string or numeric data. + + The function returns "string" for string columns or "numeric" for numeric columns. + If there is a mixture of types it returns "mixed". + If every value in the column is `None` or NA it returns "empty". + + Parameters + ---------- + col : pandas.Series + The column to inspect + + Returns + ------- + str + One of "string", "numeric", "mixed", or "empty" + """ + if col.isna().all(): + return "empty" + else: + col_numeric = pd.to_numeric(col, errors="coerce") + if col_numeric.isna().all(): # nothing converted to a number + return "string" + elif np.all(col.isna() == col_numeric.isna()): # all non-NA values converted + return "numeric" + else: # some values converted but not all + return "mixed" + + +def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: list[str], fully_qualified: bool): + """ + Validate the formatting of HGVS variants present in the passed column against + lists of prefixes and targets + + Parameters + ---------- + column : pd.Series + A pandas column containing HGVS variants + prefixes : list[str] + A list of prefixes we can expect to occur within the passed column + targets : list[str] + A list of targets we can expect to occur within the passed column + + Returns + ------- + None + + Raises + ------ + ValidationError + If any of the variants in the column are not fully qualified with respect to multiple possible targets + ValidationError + If the column contains multiple prefixes or the wrong prefix for that column name + ValidationError + If the column contains target accessions not present in the list of possible targets + """ + variants = [variant for s in column.dropna() for variant in s.split(" ")] + + # if there is more than one target, we expect variants to be fully qualified + if fully_qualified: + if not all(len(str(v).split(":")) == 2 for v in variants): + raise ValidationError( + f"variants in the provided column '{column.name}' were expected to be fully qualified, but are not described in relation to an accession" + ) + if len(set(str(v).split(":")[1][:2] for v in variants)) > 1: + raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") + if not all(str(v).split(":")[1][:2] in prefixes for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") + if not all(str(v).split(":")[0] in targets for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") + + else: + if len(set(v[:2] for v in variants)) > 1: + raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") + if not all(v[:2] in prefixes for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") + + +def generate_variant_prefixes(column: pd.Series): + """ + Generate variant prefixes for the provided column + + Parameters + ---------- + column : pd.Series + The pandas column from which to generate variant prefixes + + Returns + ------- + None + + Raises + ------ + ValueError + If the provided pandas column has an unrecognized variant column name + """ + if str(column.name).lower() == hgvs_nt_column: + return [f"{a}." for a in "cngmo"] + if str(column.name).lower() == hgvs_splice_column: + return [f"{a}." for a in "cn"] + if str(column.name).lower() == hgvs_pro_column: + return ["p."] + + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + +def validate_variant_column(column: pd.Series, is_index: bool): + """ + Validate critical column properties of an HGVS variant column, with special + attention to certain properties expected on index columns + + Parameters + ---------- + column : pd.Series + The pandas column containing HGVS variant information + id_index : bool + Whether the provided column is the index column + + Returns + ------- + None + + Raises + ------ + ValidationError + If an index column contains missing or non-unique values + ValidationError + If a column contains any numeric data + """ + if infer_column_type(column) not in ("string", "empty"): + raise ValidationError(f"variant column '{column.name}' cannot contain numeric data") + if column.isna().any() and is_index: + raise ValidationError(f"primary variant column '{column.name}' cannot contain null values") + if not column.is_unique and is_index: + raise ValidationError(f"primary variant column '{column.name}' must contain unique values") + + +def validate_data_column(column: pd.Series, force_numeric: bool = False) -> None: + """ + Validate the contents of a data column. + + Parameters + ---------- + column : pandas.Series + A data column from a dataframe + force_numeric : bool + Force the data to be numeric, used for score column and count data + + Returns + ------- + None + + Raises + ------ + ValidationError + If the data is all null + ValidationError + If the data is of mixed numeric and string types + ValidationError + If the data is not numeric and force_numeric is True + + """ + column_type = infer_column_type(column) + if column_type == "empty": + raise ValidationError(f"data column '{column.name}' contains no data") + elif column_type == "mixed": + raise ValidationError(f"data column '{column.name}' has mixed string and numeric types") + elif force_numeric and column_type != "numeric": + raise ValidationError(f"data column '{column.name}' must contain only numeric data") + + +def validate_hgvs_column_properties(column: pd.Series, observed_sequence_types: list[str]) -> None: + """ + Validates the properties of an HGVS column in a DataFrame. + + Parameters + ---------- + column : pd.Series + The column to validate. + observed_sequence_types : list[str] + A list of observed sequence types. + + Returns + ------- + None + + Raises + ------ + ValueError + If the column name is 'hgvs_nt' and 'dna' is not in the observed sequence types. + ValueError + If the column name is not recognized as either 'hgvs_splice' or 'hgvs_pro'. + """ + if str(column.name).lower() == hgvs_nt_column: + if "dna" not in observed_sequence_types: + raise ValueError( + f"invalid target sequence type(s) for '{column.name}'. At least one target should be of type `dna`. Observed types: {observed_sequence_types}" + ) + elif str(column.name).lower() != hgvs_splice_column and str(column.name).lower() != hgvs_pro_column: + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + +def construct_target_sequence_mappings( + column: pd.Series, targets: dict[str, TargetSequence] +) -> dict[str, Optional[str]]: + """ + Constructs a mapping of target sequences based on the provided column and targets. Translates protein sequences + to DNA sequences if needed for passed protein columns. Don't validate splice columns against provided sequences. + + Parameters + ---------- + column : pd.Series + The pandas Series representing the column to be validated. + targets : dict[str, TargetSequence] + A dictionary where keys are target names and values are TargetSequence objects. + + Returns + ------- + dict[str, Union[str, pd.Series]]: A dictionary where keys are target names and values are either the target sequence, + the translated target sequence, or None depending on the column type. + """ + if str(column.name).lower() not in (hgvs_nt_column, hgvs_pro_column, hgvs_splice_column): + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + if str(column.name).lower() == hgvs_splice_column: + return {name: None for name in targets.keys()} + + return { + name: translate_dna(target.sequence)[0] + if ( + str(column.name).lower() == hgvs_pro_column + and target.sequence_type == "dna" + and target.sequence is not None + ) + else target.sequence + for name, target in targets.items() + } diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py new file mode 100644 index 00000000..a8ab6557 --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -0,0 +1,371 @@ +from typing import Optional, Tuple, TYPE_CHECKING + +import numpy as np +import pandas as pd + +from mavedb.lib.exceptions import MixedTargetError +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.target_gene import TargetGene +from mavedb.lib.validation.dataframe.column import validate_data_column +from mavedb.lib.validation.dataframe.variant import ( + validate_hgvs_transgenic_column, + validate_hgvs_genomic_column, + validate_hgvs_prefix_combinations, +) + +if TYPE_CHECKING: + from cdot.hgvs.dataproviders import RESTDataProvider + + +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) + + +def validate_and_standardize_dataframe_pair( + scores_df: pd.DataFrame, + counts_df: Optional[pd.DataFrame], + targets: list[TargetGene], + hdp: Optional["RESTDataProvider"], +) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + """ + Perform validation and standardization on a pair of score and count dataframes. + + Parameters + ---------- + scores_df : pandas.DataFrame + The scores dataframe + counts_df : Optional[pandas.DataFrame] + The counts dataframe, can be None if not present + targets : str + The target genes on which to validate dataframes + hdp : RESTDataProvider + The biocommons.hgvs compatible data provider. Used to fetch sequences for hgvs validation. + + Returns + ------- + Tuple[pd.DataFrame, Optional[pd.DataFrame]] + The standardized score and count dataframes, or score and None if no count dataframe was provided + + Raises + ------ + ValidationError + If one of the validation functions raises an exception + """ + if not targets: + raise ValueError("Can't validate provided file with no targets.") + + validate_dataframe(scores_df, "scores", targets, hdp) + if counts_df is not None: + validate_dataframe(counts_df, "counts", targets, hdp) + validate_variant_columns_match(scores_df, counts_df) + + new_scores_df = standardize_dataframe(scores_df) + new_counts_df = standardize_dataframe(counts_df) if counts_df is not None else None + return new_scores_df, new_counts_df + + +def validate_dataframe( + df: pd.DataFrame, kind: str, targets: list["TargetGene"], hdp: Optional["RESTDataProvider"] +) -> None: + """ + Validate that a given dataframe passes all checks. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to validate + kind : str + The kind of dataframe "counts" or "scores" + target_seq : str + The target sequence to validate variants against + target_seq_type : str + The kind of target sequence, one of "infer" "dna" or "protein" + + Returns + ------- + None + + Raises + ------ + ValidationError + If one of the validators called raises an exception + """ + # basic checks + validate_column_names(df, kind) + validate_no_null_rows(df) + + column_mapping = {c.lower(): c for c in df.columns} + index_column = choose_dataframe_index_column(df) + + prefixes: dict[str, Optional[str]] = dict() + for c in column_mapping: + if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + is_index = column_mapping[c] == index_column + prefixes[c] = None + + # Ignore validation for null non-index hgvs columns + if df[column_mapping[c]].isna().all() and not is_index: + continue + + score_set_is_accession_based = all(target.target_accession for target in targets) + score_set_is_sequence_based = all(target.target_sequence for target in targets) + + # This is typesafe, despite Pylance's claims otherwise + if score_set_is_accession_based and not score_set_is_sequence_based: + validate_hgvs_genomic_column( + df[column_mapping[c]], + is_index, + [target.target_accession for target in targets], + hdp, # type: ignore + ) + elif score_set_is_sequence_based and not score_set_is_accession_based: + validate_hgvs_transgenic_column( + df[column_mapping[c]], + is_index, + {target.target_sequence.label: target.target_sequence for target in targets}, # type: ignore + ) + else: + raise MixedTargetError("Could not validate dataframe against provided mixed target types.") + + # post validation, handle prefixes. We've already established these columns are non-null + if score_set_is_accession_based or len(targets) > 1: + prefixes[c] = ( + df[column_mapping[c]].dropna()[0].split(" ")[0].split(":")[1][0] + ) # Just take the first prefix, we validate consistency elsewhere + else: + prefixes[c] = df[column_mapping[c]].dropna()[0][0] + + else: + force_numeric = (c == required_score_column) or (kind == "counts") + validate_data_column(df[column_mapping[c]], force_numeric) + + validate_hgvs_prefix_combinations( + hgvs_nt=prefixes[hgvs_nt_column], + hgvs_splice=prefixes[hgvs_splice_column], + hgvs_pro=prefixes[hgvs_pro_column], + transgenic=all(target.target_sequence for target in targets), + ) + + +def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: + """Standardize a dataframe by sorting the columns and changing the standard column names to lowercase. + + The standard column names are: + * hgvs_nt + * hgvs_splice + * hgvs_pro + * score + + Case for other columns is preserved. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to standardize + + Returns + ------- + pandas.DataFrame + The standardized dataframe + """ + column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS} + + df.rename(columns=column_mapper, inplace=True) + + return sort_dataframe_columns(df) + + +def sort_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame: + """Sort the columns of the given dataframe according to the expected ordering in MaveDB. + + MaveDB expects that dataframes have columns in the following order (note some columns are optional): + * hgvs_nt + * hgvs_splice + * hgvs_pro + * score + * other + + Parameters + ---------- + df : pandas.DataFrame + The dataframe with columns to sort + + Returns + ------- + pandas.DataFrame + The dataframe with the same data but sorted columns + """ + + def column_sort_function(value, columns): + if value.lower() in STANDARD_COLUMNS: + return STANDARD_COLUMNS.index(value.lower()) + else: + return columns.index(value) + len(STANDARD_COLUMNS) + + old_columns = list(df.columns) + new_columns = sorted(old_columns, key=lambda v: column_sort_function(v, old_columns)) + + return df[new_columns] + + +def validate_column_names(df: pd.DataFrame, kind: str) -> None: + """Validate the column names in a dataframe. + + This function validates the column names in the given dataframe. + It can be run for either a "scores" dataframe or a "counts" dataframe. + A "scores" dataframe must have a column named 'score' and a "counts" dataframe cannot have a column named 'score'. + + The function also checks for a valid combination of columns that define variants. + + Basic checks are performed to make sure that a column name is not empty, null, or whitespace, + as well as making sure there are no duplicate column names. + + Parameters + ---------- + df : pandas.DataFrame + The scores or counts dataframe to be validated + + kind : str + Either "counts" or "scores" depending on the kind of dataframe being validated + + Raises + ------ + ValidationError + If the column names are not valid + """ + if any(type(c) is not str for c in df.columns): + raise ValidationError("column names must be strings") + + if any(c.isspace() for c in df.columns) or any(len(c) == 0 for c in df.columns): + raise ValidationError("column names cannot be empty or whitespace") + + columns = [c.lower() for c in df.columns] + + if kind == "scores": + if required_score_column not in columns: + raise ValidationError(f"score dataframe must have a '{required_score_column}' column") + elif kind == "counts": + if required_score_column in columns: + raise ValidationError(f"counts dataframe must not have a '{required_score_column}' column") + else: + raise ValueError("kind only accepts scores and counts") + + if hgvs_splice_column in columns: + if hgvs_nt_column not in columns or hgvs_pro_column not in columns: + raise ValidationError( + f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" + ) + + if len(columns) != len(set(columns)): + raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") + + if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + raise ValidationError("dataframe does not define any variant columns") + + if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + raise ValidationError("dataframe does not define any data columns") + + +def validate_no_null_rows(df: pd.DataFrame) -> None: + """Check that there are no fully null rows in the dataframe. + + Parameters + __________ + df : pandas.DataFrame + The scores or counts dataframe being validated + + Raises + ______ + ValidationError + If there are null rows in the dataframe + """ + if any(df.isnull().all(axis=1)): + raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") + + +def choose_dataframe_index_column(df: pd.DataFrame) -> str: + """ + Identify the HGVS variant column that should be used as the index column in this dataframe. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to check + + Returns + ------- + str + The column name of the index column + + Raises + ------ + ValidationError + If no valid HGVS variant column is found + """ + column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} + + if hgvs_nt_column in column_mapping: + return column_mapping[hgvs_nt_column] + elif hgvs_pro_column in column_mapping: + return column_mapping[hgvs_pro_column] + else: + raise ValidationError("failed to find valid HGVS variant column") + + +def validate_variant_consistency(df: pd.DataFrame) -> None: + """ + Ensure that variants defined in a single row describe the same variant. + + Parameters + ---------- + df : pd.DataFrame + + Returns + ------- + None + + """ + # TODO + pass + + +def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): + """ + Checks if two dataframes have matching HGVS columns. + + The check performed is order-independent. + This function is used to validate a pair of scores and counts dataframes that were uploaded together. + + Parameters + ---------- + df1 : pandas.DataFrame + Dataframe parsed from an uploaded scores file + df2 : pandas.DataFrame + Dataframe parsed from an uploaded counts file + + Raises + ------ + ValidationError + If both dataframes do not define the same variant columns + ValidationError + If both dataframes do not define the same variants within each column + """ + for c in df1.columns: + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c not in df2: + raise ValidationError("both score and count dataframes must define matching HGVS columns") + elif df1[c].isnull().all() and df2[c].isnull().all(): + continue + elif np.any(df1[c].sort_values().values != df2[c].sort_values().values): + raise ValidationError( + f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" + ) + for c in df2.columns: + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c not in df1: + raise ValidationError("both score and count dataframes must define matching HGVS columns") diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py new file mode 100644 index 00000000..eb81873d --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -0,0 +1,329 @@ +import logging +import warnings +from typing import Hashable, Optional, TYPE_CHECKING + +import pandas as pd +from mavehgvs.exceptions import MaveHgvsParseError +from mavehgvs.variant import Variant + +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.validation.dataframe.column import ( + generate_variant_prefixes, + validate_variant_column, + validate_variant_formatting, + validate_hgvs_column_properties, + construct_target_sequence_mappings, +) +from mavedb.lib.validation.constants.target import strict_valid_sequence_types as valid_sequence_types + + +from mavedb.models.target_sequence import TargetSequence +from mavedb.models.target_accession import TargetAccession + +if TYPE_CHECKING: + from cdot.hgvs.dataproviders import RESTDataProvider + from hgvs.parser import Parser + from hgvs.validator import Validator + + +logger = logging.getLogger(__name__) + + +def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: dict[str, TargetSequence]) -> None: + """ + Validate the variants in an HGVS column from a dataframe. + + Tests whether the column has a correct and consistent prefix. + This function also validates all individual variants in the column and checks for agreement against the target + sequence (for non-splice variants). + + Implementation NOTE: We assume variants will only be presented as fully qualified (accession:variant) + if this column is being validated against multiple targets. + + Parameters + ---------- + column : pd.Series + The column from the dataframe to validate + is_index : bool + True if this is the index column for the dataframe and therefore cannot have missing values; else False + targets : dict + Dictionary containing a mapping of target gene names to their sequences. + + Returns + ------- + None + + Raises + ------ + ValueError + If the target sequence does is not dna or protein (or inferred as dna or protein) + ValueError + If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) + ValidationError + If one of the variants fails validation + """ + validate_variant_column(column, is_index) + validate_variant_formatting( + column=column, + prefixes=generate_variant_prefixes(column), + targets=list(targets.keys()), + fully_qualified=len(targets) > 1, + ) + + observed_sequence_types = validate_observed_sequence_types(targets) + validate_hgvs_column_properties(column, observed_sequence_types) + target_seqs = construct_target_sequence_mappings(column, targets) + + parsed_variants = [ + parse_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() + ] + + # format and raise an error message that contains all invalid variants + if any(not valid for valid, _ in parsed_variants): + invalid_variants = [variant for valid, variant in parsed_variants if not valid] + raise ValidationError( + f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants + ) + + return + + +def validate_hgvs_genomic_column( + column: pd.Series, is_index: bool, targets: list[TargetAccession], hdp: Optional["RESTDataProvider"] +) -> None: + """ + Validate the variants in an HGVS column from a dataframe. + + Tests whether the column has a correct and consistent prefix. + This function also validates all individual variants in the column and checks for agreement against the target + sequence (for non-splice variants). + + Parameters + ---------- + column : pd.Series + The column from the dataframe to validate + is_index : bool + True if this is the index column for the dataframe and therefore cannot have missing values; else False + targets : list + Dictionary containing a list of target accessions. + + Returns + ------- + None + + Raises + ------ + ValueError + If the target sequence does is not dna or protein (or inferred as dna or protein) + ValueError + If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) + ValidationError + If one of the variants fails validation + """ + target_accession_identifiers = [target.accession for target in targets if target.accession is not None] + validate_variant_column(column, is_index) + validate_variant_formatting( + column=column, + prefixes=generate_variant_prefixes(column), + targets=target_accession_identifiers, + fully_qualified=True, + ) + + # Attempt to import dependencies from the hgvs package. + # + # For interoperability with Mavetools, we'd prefer if users were not required to install `hgvs`, which requires postgresql and psycopg2 as + # dependencies. We resolve these dependencies only when necessary, treating them as semi-optional. For the purposes of this package, if the + # hdp parameter is ever omitted it will be inferred so long as the `hgvs` package is installed and available. For the purposes of validator + # packages such as Mavetools, users may omit the hdp parameter and proceed with non-strict validation which will log a warning. To silence + # the warning, users should install `hgvs` and pass a data provider to this function. -capodb 2025-02-26 + try: + import hgvs.parser + import hgvs.validator + + if hdp is None: + import mavedb.deps + + hdp = mavedb.deps.hgvs_data_provider() + + hp = hgvs.parser.Parser() + vr = hgvs.validator.Validator(hdp=hdp) + + except ModuleNotFoundError as err: + if hdp is not None: + logger.error( + f"Failed to import `hgvs` from a context in which it is required. A data provider ({hdp.data_version()}) is available to this function, so " + + "it is inferred that strict validation is desired. Strict validation requires the `hgvs` package for parsing and validation of HGVS strings with " + + "accession information. Please ensure the `hgvs` package is installed (https://github.com/biocommons/hgvs/?tab=readme-ov-file#installing-hgvs-locally) " + + "to silence this error." + ) + raise err + + warnings.warn( + "Failed to import `hgvs`, and no data provider is available. Skipping strict validation of HGVS genomic variants. HGVS variant strings " + + "will be validated for format only, and accession information will be ignored and assumed correct. To enable strict validation against provided accessions and " + + "silence this warning, install the `hgvs` package. See: https://github.com/biocommons/hgvs/?tab=readme-ov-file#installing-hgvs-locally." + ) + + hp, vr = None, None + + if hp is not None and vr is not None: + parsed_variants = [parse_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] + else: + parsed_variants = [ + parse_transgenic_variant( + idx, + variant, + {target: None for target in target_accession_identifiers}, + len(target_accession_identifiers) > 1, + ) + for idx, variant in column.items() + ] + + # format and raise an error message that contains all invalid variants + if any(not valid for valid, _ in parsed_variants): + invalid_variants = [variant for valid, variant in parsed_variants if not valid] + raise ValidationError( + f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants + ) + + return + + +def parse_genomic_variant( + idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" +) -> tuple[bool, Optional[str]]: + # Not pretty, but if we make it here we're guaranteed to have hgvs installed as a package, and we + # should make use of the built in exception they provide for variant validation. + import hgvs.exceptions + + if not variant_string: + return True, None + + for variant in variant_string.split(" "): + try: + validator.validate(parser.parse(variant), strict=False) + except hgvs.exceptions.HGVSError as e: + return False, f"Failed to parse row {idx} with HGVS exception: {e}" + + return True, None + + +def parse_transgenic_variant( + idx: Hashable, variant_string: str, target_sequences: dict[str, Optional[str]], is_fully_qualified: bool +) -> tuple[bool, Optional[str]]: + if not variant_string: + return True, None + + # variants can exist on the same line separated by a space + for variant in variant_string.split(" "): + if is_fully_qualified: + name, variant = str(variant).split(":") + else: + name = list(target_sequences.keys())[0] + + if variant is not None: + try: + Variant(variant, targetseq=target_sequences[name]) + except MaveHgvsParseError: + try: + Variant(variant) # note this will get called a second time for splice variants + except MaveHgvsParseError: + return False, f"invalid variant string '{variant}' at row {idx} for sequence {name}" + else: + return False, f"target sequence mismatch for '{variant}' at row {idx} for sequence {name}" + + return True, None + + +def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list[str]: + """ + Ensures that the sequence types of the given target sequences are an accepted type. + + Parameters + ---------- + targets : (dict[str, TargetSequence]) + A dictionary where the keys are target names and the values are TargetSequence objects. + + Returns + ------- + list[str]: A list of observed sequence types from the target sequences. + + Raises + ------ + ValueError + If no targets are provided. + ValueError + If any of the target sequences have an invalid sequence type. + """ + if not targets: + raise ValueError("No targets were provided; cannot validate observed sequence types with none observed.") + + observed_sequence_types = [target.sequence_type for target in targets.values()] + invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) + if invalid_sequence_types: + raise ValueError( + f"Some targets are invalid sequence types: {invalid_sequence_types}. Sequence types shoud be one of: {valid_sequence_types}" + ) + + return observed_sequence_types + + +def validate_hgvs_prefix_combinations( + hgvs_nt: Optional[str], hgvs_splice: Optional[str], hgvs_pro: Optional[str], transgenic: bool +) -> None: + """ + Validate the combination of HGVS variant prefixes. + + This function assumes that other validation, such as checking that all variants in the column have the same prefix, + has already been performed. + + Parameters + ---------- + hgvs_nt : Optional[str] + The first character (prefix) of the HGVS nucleotide variant strings, or None if not used. + hgvs_splice : Optional[str] + The first character (prefix) of the HGVS splice variant strings, or None if not used. + hgvs_pro : Optional[str] + The first character (prefix) of the HGVS protein variant strings, or None if not used. + transgenic : bool + Whether we should validate these prefix combinations as transgenic variants + + Returns + ------- + None + + Raises + ------ + ValueErrorz + If upstream validation failed and an invalid prefix string was passed to this function + ValidationError + If the combination of prefixes is not valid + """ + # ensure that the prefixes are valid - this validation should have been performed before this function was called + if hgvs_nt not in list("cngmo") + [None]: + raise ValueError("invalid nucleotide prefix") + if hgvs_splice not in list("cn") + [None]: + raise ValueError("invalid nucleotide prefix") + if hgvs_pro not in ["p", None]: + raise ValueError("invalid protein prefix") + + # test agreement of prefixes across columns + if hgvs_splice is not None: + if hgvs_nt not in list("gmo"): + raise ValidationError("nucleotide variants must use valid genomic prefix when splice variants are present") + if hgvs_pro is not None: + if hgvs_splice != "c": + raise ValidationError("splice variants' must use 'c.' prefix when protein variants are present") + else: + if hgvs_splice != "n": + raise ValidationError("splice variants must use 'n.' prefix when protein variants are not present") + elif hgvs_pro is not None and hgvs_nt is not None: + if hgvs_nt != "c": + raise ValidationError( + "nucleotide variants must use 'c.' prefix when protein variants are present and splicing variants are" + " not present" + ) + # Only raise if this data will not be validated by biocommons.hgvs + elif hgvs_nt is not None: # just hgvs_nt + if hgvs_nt != "n" and transgenic: + raise ValidationError("nucleotide variants must use 'n.' prefix when only nucleotide variants are defined") diff --git a/src/mavedb/lib/validation/py.typed b/src/mavedb/lib/validation/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 2219a496..4591124d 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -23,7 +23,7 @@ create_variants_data, ) from mavedb.lib.slack import send_slack_message -from mavedb.lib.validation.dataframe import ( +from mavedb.lib.validation.dataframe.dataframe import ( validate_and_standardize_dataframe_pair, ) from mavedb.lib.validation.exceptions import ValidationError From 2af368bd297315cffe9c6057dad10215cb6778e6 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:26:56 -0800 Subject: [PATCH 003/166] Refactor Tests to Better Identify Dependency Separation Issues Refactors most of the test suite to better identify dependency separation problems. Validation tests may now be run with only core (and dev) dependencies installed, and fixtures which operate on server dependencies are conditionally loaded based on the installed modules. With this change, it will be much more straightforward to identify dependency 'leaks', or server dependencies which mistakenly are leaked into validation type code. --- .github/workflows/run-tests-on-push.yml | 42 + tests/conftest.py | 289 +--- tests/conftest_optional.py | 301 ++++ tests/helpers/constants.py | 15 +- tests/helpers/util.py | 290 ---- tests/helpers/util/access_key.py | 47 + tests/helpers/util/collection.py | 21 + tests/helpers/util/common.py | 16 + tests/helpers/util/contributor.py | 22 + tests/helpers/util/exceptions.py | 2 + tests/helpers/util/experiment.py | 22 + tests/helpers/util/license.py | 16 + tests/helpers/util/score_set.py | 57 + tests/helpers/util/user.py | 30 + tests/helpers/util/variant.py | 99 ++ tests/lib/test_authentication.py | 66 +- tests/lib/test_score_set.py | 84 +- tests/routers/conftest.py | 38 +- tests/routers/test_access_keys.py | 11 +- tests/routers/test_collections.py | 118 +- tests/routers/test_experiments.py | 319 +++-- tests/routers/test_hgvs.py | 12 +- tests/routers/test_licenses.py | 6 + tests/routers/test_permissions.py | 40 +- tests/routers/test_score_set.py | 1250 ++++++++--------- tests/routers/test_statistics.py | 109 +- tests/routers/test_target_gene.py | 89 +- tests/routers/test_users.py | 8 +- tests/validation/dataframe/conftest.py | 43 + tests/validation/dataframe/test_column.py | 272 ++++ tests/validation/dataframe/test_dataframe.py | 414 ++++++ tests/validation/dataframe/test_variant.py | 893 ++++++++++++ tests/validation/test_dataframe.py | 1121 --------------- tests/view_models/test_experiment.py | 59 +- .../test_external_gene_identifiers.py | 26 +- .../test_publication_identifier.py | 14 +- tests/view_models/test_score_set.py | 115 +- tests/view_models/test_user.py | 7 +- tests/view_models/test_wild_type_sequence.py | 6 +- tests/worker/conftest.py | 11 +- tests/worker/test_jobs.py | 34 +- 41 files changed, 3547 insertions(+), 2887 deletions(-) create mode 100644 tests/conftest_optional.py delete mode 100644 tests/helpers/util.py create mode 100644 tests/helpers/util/access_key.py create mode 100644 tests/helpers/util/collection.py create mode 100644 tests/helpers/util/common.py create mode 100644 tests/helpers/util/contributor.py create mode 100644 tests/helpers/util/exceptions.py create mode 100644 tests/helpers/util/experiment.py create mode 100644 tests/helpers/util/license.py create mode 100644 tests/helpers/util/score_set.py create mode 100644 tests/helpers/util/user.py create mode 100644 tests/helpers/util/variant.py create mode 100644 tests/validation/dataframe/conftest.py create mode 100644 tests/validation/dataframe/test_column.py create mode 100644 tests/validation/dataframe/test_dataframe.py create mode 100644 tests/validation/dataframe/test_variant.py delete mode 100644 tests/validation/test_dataframe.py diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 965ddfb3..1044f5bd 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -6,6 +6,20 @@ env: LOG_CONFIG: test jobs: + run-tests-3_9-core-dependencies: + runs-on: ubuntu-20.04 + name: Pytest on Python 3.9 / Ubuntu 20.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_9: runs-on: ubuntu-20.04 name: Pytest on Python 3.9 / Ubuntu 20.04 @@ -20,6 +34,20 @@ jobs: - run: poetry install --with dev --extras server - run: poetry run pytest tests/ --show-capture=stdout --cov=src + run-tests-3_10-core-dependencies: + runs-on: ubuntu-latest + name: Pytest on Python 3.10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_10: runs-on: ubuntu-latest name: Pytest on Python 3.10 @@ -34,6 +62,20 @@ jobs: - run: poetry install --with dev --extras server - run: poetry run pytest tests/ --show-capture=stdout --cov=src + run-tests-3_11-core-dependencies: + runs-on: ubuntu-latest + name: Pytest on Python 3.11 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_11: runs-on: ubuntu-latest name: Pytest on Python 3.11 diff --git a/tests/conftest.py b/tests/conftest.py index e5d55a32..c5a2ef9e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,38 +1,24 @@ -import os import logging # noqa: F401 import sys -from concurrent import futures -from inspect import getsourcefile -from os.path import abspath -from unittest.mock import patch -import cdot.hgvs.dataproviders import email_validator import pytest -import pytest_asyncio import pytest_postgresql -from arq import ArqRedis -from arq.worker import Worker -from fakeredis import FakeServer -from fakeredis.aioredis import FakeConnection -from fastapi.testclient import TestClient -from httpx import AsyncClient -from redis.asyncio.connection import ConnectionPool from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base -from mavedb.deps import get_db, get_worker, hgvs_data_provider -from mavedb.lib.authentication import UserData, get_current_user -from mavedb.lib.authorization import require_current_user -from mavedb.models.user import User -from mavedb.server_main import app -from mavedb.worker.jobs import create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager sys.path.append(".") -from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from tests.conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass # needs the pytest_postgresql plugin installed assert pytest_postgresql.factories @@ -61,264 +47,3 @@ def session(postgresql): finally: session.close() Base.metadata.drop_all(bind=engine) - - -@pytest.fixture -def data_provider(): - """ - To provide the transcript for the FASTA file without a network request, use: - - ``` - from helpers.utils.constants import TEST_CDOT_TRANSCRIPT - from unittest.mock import patch - import cdot.hgvs.dataproviders - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - ... - ``` - """ - - this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) - test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") - - data_provider = cdot.hgvs.dataproviders.RESTDataProvider( - seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( - cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), - # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). - cdot.hgvs.dataproviders.SeqFetcher(), - ) - ) - - yield data_provider - - -@pytest_asyncio.fixture -async def arq_redis(): - """ - If the `enqueue_job` method of the ArqRedis object is not mocked and you need to run worker - processes from within a test client, it can only be run within the `httpx.AsyncClient` object. - The `fastapi.testclient.TestClient` object does not provide sufficient support for invocations - of asynchronous events. Note that any tests using the worker directly should be marked as async: - - ``` - @pytest.mark.asyncio - async def some_test_with_worker(async_client, arq_redis): - ... - ``` - - You can mock the `enqueue_job` method with: - - ``` - from unittest.mock import patch - def some_test(client, arq_redis): - with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: - - # Enqueue a job directly - worker_queue.enqueue_job(some_job) - - # Hit an endpoint which enqueues a job - client.post("/some/endpoint/that/invokes/the/worker") - - # Ensure at least one job was queued - worker_queue.assert_called() - ``` - """ - redis_ = ArqRedis( - connection_pool=ConnectionPool( - server=FakeServer(), - connection_class=FakeConnection, - ) - ) - await redis_.flushall() - try: - yield redis_ - finally: - await redis_.aclose(close_connection_pool=True) - - -@pytest_asyncio.fixture() -async def arq_worker(data_provider, session, arq_redis): - """ - Run worker tasks in the test environment by including it as a fixture in a test, - enqueueing a job on the ArqRedis object, and then running the worker. See the arq_redis - fixture for limitations about running worker jobs from within a TestClient object. - - ``` - async def worker_test(arq_redis, arq_worker): - await arq_redis.enqueue_job('some_job') - await arq_worker.async_run() - await arq_worker.run_check() - ``` - """ - - async def on_startup(ctx): - pass - - async def on_job(ctx): - ctx["db"] = session - ctx["hdp"] = data_provider - ctx["state"] = {} - ctx["pool"] = futures.ProcessPoolExecutor() - - worker_ = Worker( - functions=[create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager], - redis_pool=arq_redis, - burst=True, - poll_delay=0, - on_startup=on_startup, - on_job_start=on_job, - ) - # `fakeredis` does not support `INFO` - with patch("arq.worker.log_redis_info"): - try: - yield worker_ - finally: - await worker_.close() - - -@pytest.fixture -def standalone_worker_context(session, data_provider, arq_redis): - yield { - "db": session, - "hdp": data_provider, - "state": {}, - "job_id": "test_job", - "redis": arq_redis, - "pool": futures.ProcessPoolExecutor(), - } - - -@pytest.fixture() -def app_(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_require_user(): - default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - app.dependency_overrides[get_db] = override_get_db - app.dependency_overrides[get_worker] = override_get_worker - app.dependency_overrides[get_current_user] = override_current_user - app.dependency_overrides[require_current_user] = override_require_user - app.dependency_overrides[hgvs_data_provider] = override_hgvs_data_provider - - yield app - - -@pytest.fixture() -def anonymous_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - yield None - - def override_hgvs_data_provider(): - yield data_provider - - anonymous_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: require_current_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield anonymous_overrides - - -@pytest.fixture() -def extra_user_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_require_user(): - default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - anonymous_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: require_current_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield anonymous_overrides - - -@pytest.fixture() -def admin_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() - yield UserData(admin_user, admin_user.roles) - - def override_require_user(): - admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() - yield UserData(admin_user, admin_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - admin_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: override_require_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield admin_overrides - - -@pytest.fixture -def client(app_): - with TestClient(app=app_, base_url="http://testserver") as tc: - yield tc - - -@pytest_asyncio.fixture -async def async_client(app_): - async with AsyncClient(app=app_, base_url="http://testserver") as ac: - yield ac diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py new file mode 100644 index 00000000..e0f35fc4 --- /dev/null +++ b/tests/conftest_optional.py @@ -0,0 +1,301 @@ +import os +from concurrent import futures +from inspect import getsourcefile +from posixpath import abspath + +import cdot.hgvs.dataproviders +import pytest +import pytest_asyncio +from fastapi.testclient import TestClient +from httpx import AsyncClient +from unittest.mock import patch + +from mavedb.lib.authentication import UserData, get_current_user +from mavedb.lib.authorization import require_current_user +from mavedb.models.user import User +from mavedb.server_main import app +from mavedb.deps import get_db, get_worker, hgvs_data_provider +from arq.worker import Worker +from mavedb.worker.jobs import create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager + +from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER + +#################################################################################################### +# REDIS +#################################################################################################### + + +# Defer imports of redis and arq to support cases where validation tests are called with only core dependencies installed. +@pytest_asyncio.fixture +async def arq_redis(): + """ + If the `enqueue_job` method of the ArqRedis object is not mocked and you need to run worker + processes from within a test client, it can only be run within the `httpx.AsyncClient` object. + The `fastapi.testclient.TestClient` object does not provide sufficient support for invocations + of asynchronous events. Note that any tests using the worker directly should be marked as async: + + ``` + @pytest.mark.asyncio + async def some_test_with_worker(async_client, arq_redis): + ... + ``` + + You can mock the `enqueue_job` method with: + + ``` + from unittest.mock import patch + def some_test(client, arq_redis): + with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: + + # Enqueue a job directly + worker_queue.enqueue_job(some_job) + + # Hit an endpoint which enqueues a job + client.post("/some/endpoint/that/invokes/the/worker") + + # Ensure at least one job was queued + worker_queue.assert_called() + ``` + """ + from arq import ArqRedis + from fakeredis import FakeServer + from fakeredis.aioredis import FakeConnection + from redis.asyncio.connection import ConnectionPool + + redis_ = ArqRedis( + connection_pool=ConnectionPool( + server=FakeServer(), + connection_class=FakeConnection, + ) + ) + await redis_.flushall() + try: + yield redis_ + finally: + await redis_.aclose(close_connection_pool=True) + + +@pytest_asyncio.fixture() +async def arq_worker(data_provider, session, arq_redis): + """ + Run worker tasks in the test environment by including it as a fixture in a test, + enqueueing a job on the ArqRedis object, and then running the worker. See the arq_redis + fixture for limitations about running worker jobs from within a TestClient object. + + ``` + async def worker_test(arq_redis, arq_worker): + await arq_redis.enqueue_job('some_job') + await arq_worker.async_run() + await arq_worker.run_check() + ``` + """ + + async def on_startup(ctx): + pass + + async def on_job(ctx): + ctx["db"] = session + ctx["hdp"] = data_provider + ctx["state"] = {} + ctx["pool"] = futures.ProcessPoolExecutor() + + worker_ = Worker( + functions=[create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager], + redis_pool=arq_redis, + burst=True, + poll_delay=0, + on_startup=on_startup, + on_job_start=on_job, + ) + # `fakeredis` does not support `INFO` + with patch("arq.worker.log_redis_info"): + try: + yield worker_ + finally: + await worker_.close() + + +@pytest.fixture +def standalone_worker_context(session, data_provider, arq_redis): + yield { + "db": session, + "hdp": data_provider, + "state": {}, + "job_id": "test_job", + "redis": arq_redis, + "pool": futures.ProcessPoolExecutor(), + } + + +#################################################################################################### +# FASTA DATA PROVIDER +#################################################################################################### + + +@pytest.fixture +def data_provider(): + """ + To provide the transcript for the FASTA file without a network request, use: + + ``` + from helpers.utils.constants import TEST_CDOT_TRANSCRIPT + from unittest.mock import patch + import cdot.hgvs.dataproviders + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + ... + ``` + """ + this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) + test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + + data_provider = cdot.hgvs.dataproviders.RESTDataProvider( + seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( + cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), + # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). + cdot.hgvs.dataproviders.SeqFetcher(), + ) + ) + + yield data_provider + + +#################################################################################################### +# FASTAPI CLIENT +#################################################################################################### + + +@pytest.fixture() +def app_(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_require_user(): + default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + app.dependency_overrides[get_db] = override_get_db + app.dependency_overrides[get_worker] = override_get_worker + app.dependency_overrides[get_current_user] = override_current_user + app.dependency_overrides[require_current_user] = override_require_user + app.dependency_overrides[hgvs_data_provider] = override_hgvs_data_provider + + yield app + + +@pytest.fixture() +def anonymous_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + yield None + + def override_hgvs_data_provider(): + yield data_provider + + anonymous_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: require_current_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield anonymous_overrides + + +@pytest.fixture() +def extra_user_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_require_user(): + default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + anonymous_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: override_require_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield anonymous_overrides + + +@pytest.fixture() +def admin_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() + yield UserData(admin_user, admin_user.roles) + + def override_require_user(): + admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() + yield UserData(admin_user, admin_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + admin_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: override_require_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield admin_overrides + + +@pytest.fixture +def client(app_): + with TestClient(app=app_, base_url="http://testserver") as tc: + yield tc + + +@pytest_asyncio.fixture +async def async_client(app_): + async with AsyncClient(app=app_, base_url="http://testserver") as ac: + yield ac diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 03abc856..ede0ef6b 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -417,16 +417,16 @@ { "name": "TEST1", "category": "protein_coding", - "external_identifiers": [], "target_sequence": { "sequence_type": "dna", "sequence": "ACGTTT", - "reference": { - "id": 1, - "short_name": "Name", - "organism_name": "Organism", - "creation_date": date.today().isoformat(), - "modification_date": date.today().isoformat(), + "taxonomy": { + "tax_id": TEST_TAXONOMY["tax_id"], + "organism_name": TEST_TAXONOMY["organism_name"], + "common_name": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], }, }, } @@ -554,7 +554,6 @@ { "name": "TEST2", "category": "protein_coding", - "external_identifiers": [], "target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, } ], diff --git a/tests/helpers/util.py b/tests/helpers/util.py deleted file mode 100644 index 6a005e3a..00000000 --- a/tests/helpers/util.py +++ /dev/null @@ -1,290 +0,0 @@ -from copy import deepcopy -from unittest.mock import patch - -import cdot.hgvs.dataproviders -import jsonschema -from arq import ArqRedis -from sqlalchemy import select -from sqlalchemy.exc import NoResultFound - -from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data, csv_data_to_df -from mavedb.lib.validation.dataframe import validate_and_standardize_dataframe_pair -from mavedb.models.contributor import Contributor -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.license import License -from mavedb.models.target_gene import TargetGene -from mavedb.models.user import User -from mavedb.models.variant import Variant -from mavedb.view_models.collection import Collection -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from tests.helpers.constants import ( - EXTRA_USER, - TEST_CDOT_TRANSCRIPT, - TEST_COLLECTION, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_EXPERIMENT, - TEST_MINIMAL_PRE_MAPPED_METADATA, - TEST_MINIMAL_POST_MAPPED_METADATA, - TEST_MINIMAL_SEQ_SCORESET, - TEST_MINIMAL_MAPPED_VARIANT, -) - - -def add_contributor(db, urn, model, orcid_id: str, given_name: str, family_name: str): - """Without making an API call, add a new contributor to the record (experiment or score set) with given urn and model.""" - item = db.query(model).filter(model.urn == urn).one_or_none() - assert item is not None - - try: - contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).one() - except NoResultFound: - contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) - db.add(contributor) - - item.contributors = [contributor] - db.add(item) - db.commit() - - -def change_ownership(db, urn, model): - """Change the ownership of the record with given urn and model to the extra user.""" - item = db.query(model).filter(model.urn == urn).one_or_none() - assert item is not None - extra_user = db.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - assert extra_user is not None - item.created_by_id = extra_user.id - item.modified_by_id = extra_user.id - db.add(item) - db.commit() - - -def change_to_inactive_license(db, urn): - """Change the license of the score set with given urn to an inactive license.""" - item = db.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == urn).one_or_none() - assert item is not None - license = db.query(License).filter(License.active.is_(False)).first() - assert license is not None - item.license_id = license.id - db.add(item) - db.commit() - - -def create_collection(client, update=None): - collection_payload = deepcopy(TEST_COLLECTION) - if update is not None: - collection_payload.update(update) - - response = client.post("/api/v1/collections/", json=collection_payload) - assert response.status_code == 200, "Could not create collection." - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=Collection.schema()) - return response_data - - -def create_experiment(client, update=None): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - if update is not None: - experiment_payload.update(update) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.schema()) - - response = client.post("/api/v1/experiments/", json=experiment_payload) - assert response.status_code == 200, "Could not create experiment." - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=Experiment.schema()) - return response_data - - -def create_seq_score_set(client, experiment_urn, update=None): - score_set_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - if experiment_urn is not None: - score_set_payload["experimentUrn"] = experiment_urn - if update is not None: - score_set_payload.update(update) - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - - response = client.post("/api/v1/score-sets/", json=score_set_payload) - assert ( - response.status_code == 200 - ), f"Could not create sequence based score set (no variants) within experiment {experiment_urn}" - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def create_acc_score_set(client, experiment_urn, update=None): - score_set_payload = deepcopy(TEST_MINIMAL_ACC_SCORESET) - if experiment_urn is not None: - score_set_payload["experimentUrn"] = experiment_urn - if update is not None: - score_set_payload.update(update) - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - response = client.post("/api/v1/score-sets/", json=score_set_payload) - - assert ( - response.status_code == 200 - ), f"Could not create accession based score set (no variants) within experiment {experiment_urn}" - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path): - with ( - open(scores_csv_path, "rb") as score_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, - ): - files = {"scores_file": (scores_csv_path.name, score_file, "rb")} - - if counts_csv_path is not None: - counts_file = open(counts_csv_path, "rb") - files["counts_file"] = (counts_csv_path.name, counts_file, "rb") - else: - counts_file = None - - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/variants/data", files=files) - - # Assert we have mocked a job being added to the queue, and that the request succeeded. The - # response value here isn't important- we will add variants to the score set manually. - worker_queue.assert_called_once() - assert response.status_code == 200 - - if counts_file is not None: - counts_file.close() - - # Reopen files since their buffers are consumed while mocking the variant data post request. - with open(scores_csv_path, "rb") as score_file: - score_df = csv_data_to_df(score_file) - - if counts_csv_path is not None: - with open(counts_csv_path, "rb") as counts_file: - counts_df = csv_data_to_df(counts_file) - else: - counts_df = None - - # Insert variant manually, worker jobs are tested elsewhere separately. - item = db.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one_or_none() - assert item is not None - - scores, counts = validate_and_standardize_dataframe_pair(score_df, counts_df, item.target_genes, data_provider) - variants = create_variants_data(scores, counts, None) - num_variants = create_variants(db, item, variants) - assert num_variants == 3 - - item.processing_state = ProcessingState.success - item.dataset_columns = { - "score_columns": columns_for_dataset(scores), - "count_columns": columns_for_dataset(counts), - } - - db.add(item) - db.commit() - - return client.get(f"/api/v1/score-sets/{score_set['urn']}").json() - - -def create_mapped_variants_for_score_set(db, score_set_urn): - score_set = db.scalar(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)) - targets = db.scalars(select(TargetGene).where(TargetGene.score_set_id == score_set.id)) - variants = db.scalars(select(Variant).where(Variant.score_set_id == score_set.id)).all() - - for variant in variants: - mv = MappedVariant(**TEST_MINIMAL_MAPPED_VARIANT, variant_id=variant.id) - db.add(mv) - - for target in targets: - target.pre_mapped_metadata = TEST_MINIMAL_PRE_MAPPED_METADATA - target.post_mapped_metadata = TEST_MINIMAL_POST_MAPPED_METADATA - db.add(target) - - score_set.mapping_state = MappingState.complete - db.commit() - return - - -def create_seq_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_seq_score_set(client, experiment_urn, update) - score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) - - assert ( - score_set["numVariants"] == 3 - ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def create_acc_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_acc_score_set(client, experiment_urn, update) - score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) - - assert ( - score_set["numVariants"] == 3 - ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def publish_score_set(client, score_set_urn): - with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: - response = client.post(f"/api/v1/score-sets/{score_set_urn}/publish") - assert response.status_code == 200, f"Could not publish score set {score_set_urn}" - worker_queue.assert_called_once() - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def create_api_key_for_current_user(client): - response = client.post("api/v1/users/me/access-keys") - assert response.status_code == 200 - return response.json()["keyId"] - - -def create_admin_key_for_current_user(client): - response = client.post("api/v1/users/me/access-keys/admin") - assert response.status_code == 200 - return response.json()["keyId"] - - -def mark_user_inactive(session, username): - user = session.query(User).where(User.username == username).one() - user.is_active = False - - session.add(user) - session.commit() - session.refresh(user) - - return user - - -async def awaitable_exception(): - return Exception() - - -def update_expected_response_for_created_resources(expected_response, created_experiment, created_score_set): - expected_response.update({"urn": created_score_set["urn"]}) - expected_response["experiment"].update( - { - "urn": created_experiment["urn"], - "experimentSetUrn": created_experiment["experimentSetUrn"], - "scoreSetUrns": [created_score_set["urn"]], - } - ) - - return expected_response diff --git a/tests/helpers/util/access_key.py b/tests/helpers/util/access_key.py new file mode 100644 index 00000000..3058a24c --- /dev/null +++ b/tests/helpers/util/access_key.py @@ -0,0 +1,47 @@ +import secrets + +from sqlalchemy import select +from sqlalchemy.orm import Session +from fastapi.testclient import TestClient + +from mavedb.models.access_key import AccessKey +from mavedb.models.user import User +from mavedb.models.enums.user_role import UserRole + +from mavedb.routers.access_keys import generate_key_pair + + +def create_api_key_for_user(db: Session, username: str) -> str: + user = db.scalars(select(User).where(User.username == username)).one() + private_key, public_key = generate_key_pair() + + item = AccessKey(user=user, key_id=secrets.token_urlsafe(32), public_key=public_key) + db.add(item) + db.commit() + db.refresh(item) + + return item.key_id + + +def create_admin_key_for_user(db: Session, username: str) -> str: + user = db.scalars(select(User).where(User.username == username)).one() + private_key, public_key = generate_key_pair() + + item = AccessKey(user=user, key_id=secrets.token_urlsafe(32), public_key=public_key, role=UserRole.admin) + db.add(item) + db.commit() + db.refresh(item) + + return item.public_key + + +def create_api_key_for_current_user(client: TestClient) -> str: + response = client.post("api/v1/users/me/access-keys") + assert response.status_code == 200 + return response.json()["keyId"] + + +def create_admin_key_for_current_user(client: TestClient) -> str: + response = client.post("api/v1/users/me/access-keys/admin") + assert response.status_code == 200 + return response.json()["keyId"] diff --git a/tests/helpers/util/collection.py b/tests/helpers/util/collection.py new file mode 100644 index 00000000..e2cec1c1 --- /dev/null +++ b/tests/helpers/util/collection.py @@ -0,0 +1,21 @@ +import jsonschema +from copy import deepcopy +from typing import Any, Dict, Optional + +from mavedb.view_models.collection import Collection + +from tests.helpers.constants import TEST_COLLECTION +from fastapi.testclient import TestClient + + +def create_collection(client: TestClient, update: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + collection_payload = deepcopy(TEST_COLLECTION) + if update is not None: + collection_payload.update(update) + + response = client.post("/api/v1/collections/", json=collection_payload) + assert response.status_code == 200, "Could not create collection." + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=Collection.schema()) + return response_data diff --git a/tests/helpers/util/common.py b/tests/helpers/util/common.py new file mode 100644 index 00000000..d88f4a38 --- /dev/null +++ b/tests/helpers/util/common.py @@ -0,0 +1,16 @@ +from typing import Dict, Any + + +def update_expected_response_for_created_resources( + expected_response: Dict[str, Any], created_experiment: Dict[str, Any], created_score_set: Dict[str, Any] +) -> Dict[str, Any]: + expected_response.update({"urn": created_score_set["urn"]}) + expected_response["experiment"].update( + { + "urn": created_experiment["urn"], + "experimentSetUrn": created_experiment["experimentSetUrn"], + "scoreSetUrns": [created_score_set["urn"]], + } + ) + + return expected_response diff --git a/tests/helpers/util/contributor.py b/tests/helpers/util/contributor.py new file mode 100644 index 00000000..7ca05598 --- /dev/null +++ b/tests/helpers/util/contributor.py @@ -0,0 +1,22 @@ +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy import select +from sqlalchemy.orm import Session +from typing import Any + +from mavedb.models.contributor import Contributor + + +def add_contributor(db: Session, urn: str, model: Any, orcid_id: str, given_name: str, family_name: str) -> None: + """Without making an API call, add a new contributor to the record (experiment or score set) with given urn and model.""" + item = db.query(model).filter(model.urn == urn).one_or_none() + assert item is not None + + try: + contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).one() + except NoResultFound: + contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) + db.add(contributor) + + item.contributors = [contributor] + db.add(item) + db.commit() diff --git a/tests/helpers/util/exceptions.py b/tests/helpers/util/exceptions.py new file mode 100644 index 00000000..bb5a906c --- /dev/null +++ b/tests/helpers/util/exceptions.py @@ -0,0 +1,2 @@ +async def awaitable_exception() -> Exception: + return Exception() diff --git a/tests/helpers/util/experiment.py b/tests/helpers/util/experiment.py new file mode 100644 index 00000000..c130c076 --- /dev/null +++ b/tests/helpers/util/experiment.py @@ -0,0 +1,22 @@ +import jsonschema +from copy import deepcopy +from typing import Any, Dict, Optional + +from mavedb.view_models.experiment import Experiment, ExperimentCreate + +from tests.helpers.constants import TEST_MINIMAL_EXPERIMENT +from fastapi.testclient import TestClient + + +def create_experiment(client: TestClient, update: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) + if update is not None: + experiment_payload.update(update) + jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.schema()) + + response = client.post("/api/v1/experiments/", json=experiment_payload) + assert response.status_code == 200, "Could not create experiment." + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=Experiment.schema()) + return response_data diff --git a/tests/helpers/util/license.py b/tests/helpers/util/license.py new file mode 100644 index 00000000..895b8a99 --- /dev/null +++ b/tests/helpers/util/license.py @@ -0,0 +1,16 @@ +from sqlalchemy.orm import Session +from mavedb.models.license import License +from mavedb.models.score_set import ScoreSet + + +def change_to_inactive_license(db: Session, urn: str) -> None: + """Change the license of the score set with given urn to an inactive license.""" + item = db.query(ScoreSet).filter(ScoreSet.urn == urn).one_or_none() + assert item is not None + + license = db.query(License).filter(License.active.is_(False)).first() + assert license is not None + + item.license_id = license.id + db.add(item) + db.commit() diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py new file mode 100644 index 00000000..1cc61e54 --- /dev/null +++ b/tests/helpers/util/score_set.py @@ -0,0 +1,57 @@ +import cdot.hgvs.dataproviders +import jsonschema +from copy import deepcopy +from unittest.mock import patch +from typing import Any, Dict, Optional + +from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate + +from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, TEST_CDOT_TRANSCRIPT +from fastapi.testclient import TestClient + + +def create_seq_score_set( + client: TestClient, experiment_urn: Optional[str], update: Optional[Dict[str, Any]] = None +) -> Dict[str, Any]: + score_set_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + if experiment_urn is not None: + score_set_payload["experimentUrn"] = experiment_urn + if update is not None: + score_set_payload.update(update) + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) + + response = client.post("/api/v1/score-sets/", json=score_set_payload) + assert response.status_code == 200, "Could not create sequence based score set" + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + return response_data + + +def create_acc_score_set( + client: TestClient, experiment_urn: Optional[str], update: Optional[Dict[str, Any]] = None +) -> Dict[str, Any]: + score_set_payload = deepcopy(TEST_MINIMAL_ACC_SCORESET) + if experiment_urn is not None: + score_set_payload["experimentUrn"] = experiment_urn + if update is not None: + score_set_payload.update(update) + + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) + + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + response = client.post("/api/v1/score-sets/", json=score_set_payload) + + assert response.status_code == 200, "Could not create accession based score set" + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + return response_data + + +def publish_score_set(client: TestClient, score_set_urn: str) -> Dict[str, Any]: + response = client.post(f"/api/v1/score-sets/{score_set_urn}/publish") + assert response.status_code == 200, f"Could not publish score set {score_set_urn}" + + response_data = response.json() + return response_data diff --git a/tests/helpers/util/user.py b/tests/helpers/util/user.py new file mode 100644 index 00000000..b0ffab54 --- /dev/null +++ b/tests/helpers/util/user.py @@ -0,0 +1,30 @@ +from typing import Any + +from sqlalchemy.orm import Session + +from mavedb.models.user import User + +from tests.helpers.constants import EXTRA_USER + + +def mark_user_inactive(session: Session, username: str) -> User: + user = session.query(User).where(User.username == username).one() + user.is_active = False + + session.add(user) + session.commit() + session.refresh(user) + + return user + + +def change_ownership(db: Session, urn: str, model: Any) -> None: + """Change the ownership of the record with given urn and model to the extra user.""" + item = db.query(model).filter(model.urn == urn).one_or_none() + assert item is not None + extra_user = db.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + assert extra_user is not None + item.created_by_id = extra_user.id + item.modified_by_id = extra_user.id + db.add(item) + db.commit() diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py new file mode 100644 index 00000000..57bcd436 --- /dev/null +++ b/tests/helpers/util/variant.py @@ -0,0 +1,99 @@ +from typing import Optional + +from arq import ArqRedis +from cdot.hgvs.dataproviders import RESTDataProvider +from fastapi.testclient import TestClient +from sqlalchemy.orm import Session +from sqlalchemy import select +from unittest.mock import patch + +from mavedb.lib.score_sets import create_variants, create_variants_data, csv_data_to_df +from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.target_gene import TargetGene +from mavedb.models.variant import Variant + +from tests.helpers.constants import ( + TEST_MINIMAL_MAPPED_VARIANT, + TEST_MINIMAL_PRE_MAPPED_METADATA, + TEST_MINIMAL_POST_MAPPED_METADATA, +) + + +def mock_worker_variant_insertion( + client: TestClient, + db: Session, + data_provider: RESTDataProvider, + score_set: dict, + scores_csv_path: str, + counts_csv_path: Optional[str] = None, +) -> None: + with ( + open(scores_csv_path, "rb") as score_file, + patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, + ): + files = {"scores_file": (scores_csv_path.name, score_file, "rb")} + + if counts_csv_path is not None: + counts_file = open(counts_csv_path, "rb") + files["counts_file"] = (counts_csv_path.name, counts_file, "rb") + else: + counts_file = None + + response = client.post(f"/api/v1/score-sets/{score_set['urn']}/variants/data", files=files) + + # Assert we have mocked a job being added to the queue, and that the request succeeded. The + # response value here isn't important- we will add variants to the score set manually. + worker_queue.assert_called_once() + assert response.status_code == 200 + + if counts_file is not None: + counts_file.close() + + # Reopen files since their buffers are consumed while mocking the variant data post request. + with open(scores_csv_path, "rb") as score_file: + score_df = csv_data_to_df(score_file) + + if counts_csv_path is not None: + with open(counts_csv_path, "rb") as counts_file: + counts_df = csv_data_to_df(counts_file) + else: + counts_df = None + + # Insert variant manually, worker jobs are tested elsewhere separately. + item = db.scalars(select(ScoreSet).where(ScoreSet.urn == score_set["urn"])).one_or_none() + assert item is not None + + scores, counts = validate_and_standardize_dataframe_pair(score_df, counts_df, item.target_genes, data_provider) + variants = create_variants_data(scores, counts, None) + num_variants = create_variants(db, item, variants) + assert num_variants == 3 + + item.processing_state = ProcessingState.success + + db.add(item) + db.commit() + + return client.get(f"api/v1/score-sets/{score_set['urn']}").json() + + +def create_mapped_variants_for_score_set(db, score_set_urn): + score_set = db.scalar(select(ScoreSet).where(ScoreSet.urn == score_set_urn)) + targets = db.scalars(select(TargetGene).where(TargetGene.score_set_id == score_set.id)) + variants = db.scalars(select(Variant).where(Variant.score_set_id == score_set.id)).all() + + for variant in variants: + mv = MappedVariant(**TEST_MINIMAL_MAPPED_VARIANT, variant_id=variant.id) + db.add(mv) + + for target in targets: + target.pre_mapped_metadata = TEST_MINIMAL_PRE_MAPPED_METADATA + target.post_mapped_metadata = TEST_MINIMAL_POST_MAPPED_METADATA + db.add(target) + + score_set.mapping_state = MappingState.complete + db.commit() + return diff --git a/tests/lib/test_authentication.py b/tests/lib/test_authentication.py index d0c1aa0d..53427193 100644 --- a/tests/lib/test_authentication.py +++ b/tests/lib/test_authentication.py @@ -1,74 +1,62 @@ -from unittest.mock import patch +# ruff: noqa: E402 import pytest -from fastapi import HTTPException +from unittest.mock import patch + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") from mavedb.lib.authentication import get_current_user, get_current_user_data_from_api_key from mavedb.models.enums.user_role import UserRole from mavedb.models.user import User from tests.helpers.constants import ADMIN_USER, ADMIN_USER_DECODED_JWT, TEST_USER, TEST_USER_DECODED_JWT -from tests.helpers.util import create_api_key_for_current_user, mark_user_inactive + +from tests.helpers.util.access_key import create_api_key_for_user +from tests.helpers.util.user import mark_user_inactive @pytest.mark.asyncio -async def test_get_current_user_data_from_key_valid_token(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_valid_token(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, access_key) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_data_from_key_invalid_token(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_invalid_token(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, f"invalid_{access_key}") assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_data_from_key_nonetype_token(session, setup_lib_db, client): - create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_nonetype_token(session, setup_lib_db): + create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_via_api_key(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_via_api_key(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, access_key) user_data = await get_current_user(user_data, None, session, None) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_via_token_payload(session, setup_lib_db): user_data = await get_current_user(None, TEST_USER_DECODED_JWT, session, None) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_no_api_no_jwt(session, setup_lib_db): user_data = await get_current_user(None, None, session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_no_username(session, setup_lib_db): @@ -79,9 +67,6 @@ async def test_get_current_user_no_username(session, setup_lib_db): user_data = await get_current_user(None, jwt_without_sub, session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio @pytest.mark.parametrize("with_email", [True, False]) @@ -106,9 +91,6 @@ async def test_get_current_user_nonexistent_user(session, setup_lib_db, with_ema # Ensure one user record is in the database session.query(User).filter(User.username == new_user_jwt["sub"]).one() - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_is_inactive(session, setup_lib_db): @@ -117,9 +99,6 @@ async def test_get_current_user_user_is_inactive(session, setup_lib_db): assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_set_active_roles(session, setup_lib_db): @@ -128,19 +107,13 @@ async def test_get_current_user_set_active_roles(session, setup_lib_db): assert user_data.user.username == ADMIN_USER["username"] assert UserRole.admin in user_data.active_roles - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_with_invalid_role_membership(session, setup_lib_db): - with pytest.raises(HTTPException) as exc_info: + with pytest.raises(Exception) as exc_info: await get_current_user(None, TEST_USER_DECODED_JWT, session, "admin") assert "This user is not a member of the requested acting role." in str(exc_info.value.detail) - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_extraneous_roles(session, setup_lib_db): @@ -148,6 +121,3 @@ async def test_get_current_user_user_extraneous_roles(session, setup_lib_db): assert user_data.user.username == TEST_USER["username"] assert user_data.active_roles == [] - - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() diff --git a/tests/lib/test_score_set.py b/tests/lib/test_score_set.py index d95ad6f1..e61c902f 100644 --- a/tests/lib/test_score_set.py +++ b/tests/lib/test_score_set.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import io import numpy as np @@ -5,6 +7,10 @@ import pytest from sqlalchemy import select +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.score_sets import ( HGVSColumns, columns_for_dataset, @@ -19,10 +25,17 @@ null_values_list, required_score_column, ) +from mavedb.models.experiment import Experiment +from mavedb.models.license import License from mavedb.models.score_set import ScoreSet +from mavedb.models.target_accession import TargetAccession +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence +from mavedb.models.taxonomy import Taxonomy from mavedb.models.variant import Variant -from tests.helpers.constants import TEST_SAVED_SCORESET_RANGE -from tests.helpers.util import create_acc_score_set, create_experiment, create_seq_score_set +from tests.helpers.constants import TEST_SAVED_SCORESET_RANGE, TEST_EXPERIMENT, TEST_ACC_SCORESET, TEST_SEQ_SCORESET +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set def test_columns_for_dataset_no_dataset(): @@ -264,12 +277,37 @@ def test_create_variants_data_scores_and_counts_mismatched_lengths(): create_variants_data(scores_df, counts_df) -def test_create_variants_seq_score_set(setup_lib_db, client, session): - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - score_set = session.scalars(select(ScoreSet)).first() - variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) +def test_create_variants_seq_score_set(setup_lib_db, session): + experiment = Experiment(**TEST_EXPERIMENT, extra_metadata={}) + session.add(experiment) + session.commit() + session.refresh(experiment) + + target_sequences = [ + TargetSequence(**{**seq["target_sequence"], **{"taxonomy": session.scalars(select(Taxonomy)).first()}}) + for seq in TEST_SEQ_SCORESET["target_genes"] + ] + target_genes = [ + TargetGene(**{**gene, **{"target_sequence": target_sequences[idx]}}) + for idx, gene in enumerate(TEST_SEQ_SCORESET["target_genes"]) + ] + + score_set = ScoreSet( + **{ + **TEST_SEQ_SCORESET, + **{ + "experiment_id": experiment.id, + "target_genes": target_genes, + "extra_metadata": {}, + "license": session.scalars(select(License)).first(), + }, + } + ) + session.add(score_set) + session.commit() + session.refresh(score_set) + variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) num_variants = create_variants( session, score_set, @@ -287,12 +325,34 @@ def test_create_variants_seq_score_set(setup_lib_db, client, session): session.commit() -def test_create_variants_acc_score_set(setup_lib_db, client, session): - experiment = create_experiment(client) - score_set = create_acc_score_set(client, experiment["urn"]) - score_set = session.scalars(select(ScoreSet)).first() - variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) +def test_create_variants_acc_score_set(setup_lib_db, session): + experiment = Experiment(**TEST_EXPERIMENT, extra_metadata={}) + session.add(experiment) + session.commit() + session.refresh(experiment) + + target_accessions = [TargetAccession(**seq["target_accession"]) for seq in TEST_ACC_SCORESET["target_genes"]] + target_genes = [ + TargetGene(**{**gene, **{"target_accession": target_accessions[idx]}}) + for idx, gene in enumerate(TEST_ACC_SCORESET["target_genes"]) + ] + score_set = ScoreSet( + **{ + **TEST_ACC_SCORESET, + **{ + "experiment_id": experiment.id, + "target_genes": target_genes, + "extra_metadata": {}, + "license": session.scalars(select(License)).first(), + }, + } + ) + session.add(score_set) + session.commit() + session.refresh(score_set) + + variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) num_variants = create_variants( session, score_set, diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index 591c4e3e..761705bc 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -1,23 +1,20 @@ from pathlib import Path from shutil import copytree -from unittest.mock import patch -import cdot.hgvs.dataproviders import pytest from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor from mavedb.models.enums.user_role import UserRole -from mavedb.models.published_variant import PublishedVariantsMV from mavedb.models.license import License from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User + from tests.helpers.constants import ( ADMIN_USER, EXTRA_USER, EXTRA_CONTRIBUTOR, - TEST_CDOT_TRANSCRIPT, TEST_DB_KEYWORDS, TEST_LICENSE, TEST_INACTIVE_LICENSE, @@ -25,13 +22,6 @@ TEST_TAXONOMY, TEST_USER, ) -from tests.helpers.util import ( - create_acc_score_set_with_variants, - create_experiment, - create_seq_score_set_with_variants, - create_mapped_variants_for_score_set, - publish_score_set, -) @pytest.fixture @@ -60,32 +50,6 @@ def data_files(tmp_path): return tmp_path / "data" -# Fixtures for setting up score sets on which to calculate statistics. -# Adds an experiment and score set to the database, then publishes the score set. -@pytest.fixture -def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): - experiment = create_experiment(client) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - score_set = create_acc_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores_acc.csv" - ) - publish_score_set(client, score_set["urn"]) - - -@pytest.fixture -def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_files): - experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) - create_mapped_variants_for_score_set(session, score_set["urn"]) - publish_score_set(client, score_set["urn"]) - - # Note that we have not created indexes for this view when it is generated via metadata. This differs - # from the database created via alembic, which does create indexes. - PublishedVariantsMV.refresh(session, False) - - @pytest.fixture def mock_publication_fetch(request, requests_mock): """ diff --git a/tests/routers/test_access_keys.py b/tests/routers/test_access_keys.py index 4e266a0f..836dad6d 100644 --- a/tests/routers/test_access_keys.py +++ b/tests/routers/test_access_keys.py @@ -1,9 +1,18 @@ +# ruff: noqa: E402 + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.access_key import AccessKey from mavedb.models.enums.user_role import UserRole from mavedb.models.user import User + from tests.helpers.constants import EXTRA_USER from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import create_admin_key_for_current_user, create_api_key_for_current_user +from tests.helpers.util.access_key import create_admin_key_for_current_user, create_api_key_for_current_user def test_create_user_access_key(client, setup_router_db, session): diff --git a/tests/routers/test_collections.py b/tests/routers/test_collections.py index 3fae0d91..ce6a1ef4 100644 --- a/tests/routers/test_collections.py +++ b/tests/routers/test_collections.py @@ -1,12 +1,20 @@ +# ruff: noqa: E402 + import re from copy import deepcopy +from unittest.mock import patch import jsonschema import pytest +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_COLLECTION_URN_RE from mavedb.models.enums.contribution_role import ContributionRole from mavedb.view_models.collection import Collection + from tests.helpers.constants import ( EXTRA_USER, TEST_USER, @@ -14,12 +22,10 @@ TEST_COLLECTION_RESPONSE, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - create_collection, - create_experiment, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.collection import create_collection +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_create_private_collection(client, setup_router_db): @@ -224,10 +230,14 @@ def test_admin_can_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/admins", json={"orcid_id": EXTRA_USER["username"]}) @@ -278,10 +288,14 @@ def test_editor_can_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/editors", json={"orcid_id": EXTRA_USER["username"]}) @@ -326,10 +340,14 @@ def test_viewer_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/viewers", json={"orcid_id": EXTRA_USER["username"]}) @@ -349,10 +367,14 @@ def test_unauthorized_user_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -370,10 +392,14 @@ def test_anonymous_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, anonymous_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -391,10 +417,14 @@ def test_admin_can_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/admins", json={"orcid_id": EXTRA_USER["username"]}) @@ -444,10 +474,14 @@ def test_editor_can_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/editors", json={"orcid_id": EXTRA_USER["username"]}) @@ -491,10 +525,14 @@ def test_viewer_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/viewers", json={"orcid_id": EXTRA_USER["username"]}) @@ -513,10 +551,14 @@ def test_unauthorized_user_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -533,10 +575,14 @@ def test_anonymous_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, anonymous_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 199cd2b7..6908a0ab 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import re from copy import deepcopy from datetime import date @@ -8,12 +10,17 @@ import requests import requests_mock +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.view_models.experiment import Experiment, ExperimentCreate from mavedb.view_models.orcid import OrcidUser + from tests.helpers.constants import ( EXTRA_USER, TEST_BIORXIV_IDENTIFIER, @@ -31,14 +38,11 @@ TEST_USER, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - add_contributor, - change_ownership, - create_experiment, - create_seq_score_set, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.user import change_ownership +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_test_minimal_experiment_is_valid(): @@ -500,11 +504,15 @@ def test_admin_can_update_other_users_private_experiment_set(session, client, ad def test_can_update_own_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + response_data = create_experiment( client, {"experimentSetUrn": published_score_set["experiment"]["experimentSetUrn"], "title": "Second Experiment"}, @@ -515,10 +523,15 @@ def test_can_update_own_public_experiment_set(session, data_provider, client, se def test_cannot_update_other_users_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + published_experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] change_ownership(session, published_experiment_set_urn, ExperimentSetDbModel) experiment_post_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) @@ -533,10 +546,15 @@ def test_anonymous_cannot_update_others_user_public_experiment_set( session, data_provider, client, anonymous_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + published_experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] experiment_post_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) experiment_post_payload.update({"experimentSetUrn": published_experiment_set_urn, "title": "Second Experiment"}) @@ -553,10 +571,14 @@ def test_admin_can_update_other_users_public_experiment_set( session, data_provider, client, admin_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(admin_app_overrides): response_data = create_experiment( @@ -1007,47 +1029,61 @@ def test_search_my_experiments(session, client, setup_router_db): def test_search_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, score_set["urn"]) - meta_score_set = create_seq_score_set_with_variants( + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = publish_score_set(client, meta_score_set["urn"]) score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() search_payload = {"metaAnalysis": True} response = client.post("/api/v1/me/experiments/search", json=search_payload) assert response.status_code == 200 response_data = response.json() - assert any(item["urn"] == meta_score_set["experiment"]["urn"] for item in response_data) + assert any(item["urn"] == published_meta_score_set["experiment"]["urn"] for item in response_data) assert all(item["urn"] != score_set_refresh["experiment"]["urn"] for item in response_data) def test_search_exclude_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, score_set["urn"]) - meta_score_set = create_seq_score_set_with_variants( + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = publish_score_set(client, meta_score_set["urn"]) score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() search_payload = {"metaAnalysis": False} response = client.post("/api/v1/me/experiments/search", json=search_payload) @@ -1059,14 +1095,17 @@ def test_search_exclude_meta_analysis_experiment(session, data_provider, client, def test_search_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # make the unpublished score set owned by some other user. This shouldn't appear in the results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) change_ownership(session, score_set_unpub["urn"], ScoreSetDbModel) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") @@ -1080,10 +1119,15 @@ def test_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_e session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -1103,10 +1147,15 @@ def test_non_owner_searches_score_sets_with_unpublished_superseding_score_sets_f session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -1127,22 +1176,28 @@ def test_owner_searches_published_superseding_score_sets_for_experiments( session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, - ) - published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] + superseding_score_set = create_seq_score_set( + client, experiment_urn, update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") assert response.status_code == 200 assert len(response.json()) == 1 @@ -1153,24 +1208,31 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + # On score set publication, the experiment will get a new urn + experiment_urn = published_score_set["experiment"]["urn"] + superseding_score_set = create_seq_score_set( + client, experiment_urn, update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" ) - published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel) - # On score set publication, the experiment will get a new urn - experiment_urn = published_score_set["experiment"]["urn"] + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") assert response.status_code == 200 assert len(response.json()) == 1 @@ -1179,12 +1241,11 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # make the unpublished score set owned by some other user. This shouldn't appear in the results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) change_ownership(session, score_set_unpub["urn"], ScoreSetDbModel) add_contributor( session, @@ -1195,6 +1256,10 @@ def test_search_score_sets_for_contributor_experiments(session, client, setup_ro TEST_USER["last_name"], ) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") @@ -1207,12 +1272,14 @@ def test_search_score_sets_for_contributor_experiments(session, client, setup_ro def test_search_score_sets_for_my_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # The unpublished score set is for the current user, so it should show up in results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] @@ -1278,13 +1345,18 @@ def test_anonymous_cannot_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] with DependencyOverrider(anonymous_app_overrides): - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 401 del_response_data = del_response.json() @@ -1300,11 +1372,16 @@ def test_can_delete_own_private_experiment(session, client, setup_router_db): def test_cannot_delete_own_published_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment_urn = published_score_set["experiment"]["urn"] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 403 @@ -1340,21 +1417,25 @@ def test_contributor_cannot_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment = published_score_set["experiment"] - change_ownership(session, experiment["urn"], ExperimentDbModel) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] + change_ownership(session, experiment_urn, ExperimentDbModel) add_contributor( session, - experiment["urn"], + experiment_urn, ExperimentDbModel, TEST_USER["username"], TEST_USER["first_name"], TEST_USER["last_name"], ) - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 403 @@ -1363,13 +1444,18 @@ def test_admin_can_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment = published_score_set["experiment"] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] with DependencyOverrider(admin_app_overrides): - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 200 @@ -1384,10 +1470,15 @@ def test_can_add_experiment_to_own_private_experiment_set(session, client, setup def test_can_add_experiment_to_own_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + test_experiment = deepcopy(TEST_MINIMAL_EXPERIMENT) test_experiment.update({"experimentSetUrn": published_score_set["experiment"]["experimentSetUrn"]}) response = client.post("/api/v1/experiments/", json=test_experiment) @@ -1416,10 +1507,15 @@ def test_contributor_can_add_experiment_to_others_public_experiment_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) change_ownership(session, published_score_set["experiment"]["experimentSetUrn"], ExperimentSetDbModel) @@ -1454,10 +1550,15 @@ def test_cannot_add_experiment_to_others_public_experiment_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) diff --git a/tests/routers/test_hgvs.py b/tests/routers/test_hgvs.py index f59e5c27..9a19f709 100644 --- a/tests/routers/test_hgvs.py +++ b/tests/routers/test_hgvs.py @@ -1,8 +1,14 @@ +# ruff: noqa: E402 + from unittest.mock import patch -import cdot.hgvs.dataproviders +import pytest import requests_mock -from hgvs.exceptions import HGVSDataNotAvailableError + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") +hgvs = pytest.importorskip("hgvs") from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION, VALID_GENE @@ -29,7 +35,7 @@ def test_hgvs_fetch_valid(client, setup_router_db): def test_hgvs_fetch_invalid(client, setup_router_db): with patch.object( - cdot.hgvs.dataproviders.ChainedSeqFetcher, "fetch_seq", side_effect=HGVSDataNotAvailableError() + cdot.hgvs.dataproviders.ChainedSeqFetcher, "fetch_seq", side_effect=hgvs.exceptions.HGVSDataNotAvailableError() ) as p: response = client.get(f"/api/v1/hgvs/fetch/{SMALL_ACCESSION}") p.assert_called_once() diff --git a/tests/routers/test_licenses.py b/tests/routers/test_licenses.py index 97c487a3..4d09a11d 100644 --- a/tests/routers/test_licenses.py +++ b/tests/routers/test_licenses.py @@ -1,5 +1,11 @@ +# ruff: noqa: E402 + import pytest +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from tests.helpers.constants import TEST_LICENSE from tests.helpers.dependency_overrider import DependencyOverrider diff --git a/tests/routers/test_permissions.py b/tests/routers/test_permissions.py index ef8bebb1..6b79b81d 100644 --- a/tests/routers/test_permissions.py +++ b/tests/routers/test_permissions.py @@ -1,15 +1,22 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel + from tests.helpers.constants import TEST_USER -from tests.helpers.util import ( - add_contributor, - change_ownership, - create_experiment, - create_seq_score_set, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.user import change_ownership +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion # Test check_authorization function @@ -171,13 +178,18 @@ def test_get_true_permission_from_others_public_experiment_add_score_set_check( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set_1["urn"]) - pub_experiment_urn = published_score_set["experiment"]["urn"] - change_ownership(session, pub_experiment_urn, ExperimentDbModel) - response = client.get(f"/api/v1/permissions/user-is-permitted/experiment/{pub_experiment_urn}/add_score_set") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) + response = client.get(f"/api/v1/permissions/user-is-permitted/experiment/{published_experiment_urn}/add_score_set") assert response.status_code == 200 assert response.json() diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 67c26b27..bb91a3dd 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import re from copy import deepcopy from datetime import date @@ -5,10 +7,13 @@ import jsonschema import pytest -from arq import ArqRedis from humps import camelize from sqlalchemy import select +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE, MAVEDB_SCORE_SET_URN_RE, MAVEDB_EXPERIMENT_URN_RE from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.experiment import Experiment as ExperimentDbModel @@ -16,6 +21,7 @@ from mavedb.models.variant import Variant as VariantDbModel from mavedb.view_models.orcid import OrcidUser from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate + from tests.helpers.constants import ( EXTRA_USER, EXTRA_LICENSE, @@ -38,15 +44,13 @@ TEST_SAVED_SCORE_CALIBRATION, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - add_contributor, - change_ownership, - change_to_inactive_license, - create_experiment, - create_seq_score_set, - create_seq_score_set_with_variants, - update_expected_response_for_created_resources, -) +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.license import change_to_inactive_license +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.common import update_expected_response_for_created_resources +from tests.helpers.util.user import change_ownership +from tests.helpers.util.variant import mock_worker_variant_insertion ######################################################################################################################## @@ -304,15 +308,12 @@ def test_can_update_score_set_supporting_data_after_publication( data_files, ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - published_score_set = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() published_urn = published_score_set["urn"] response = client.get(f"/api/v1/score-sets/{published_urn}") @@ -369,15 +370,12 @@ def test_cannot_update_score_set_target_data_after_publication( client, setup_router_db, attribute, expected_response_data, updated_data, session, data_provider, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - published_score_set = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() published_urn = published_score_set["urn"] response = client.get(f"/api/v1/score-sets/{published_urn}") @@ -535,7 +533,7 @@ def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, da scores_csv_path = data_files / "scores.csv" with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -561,7 +559,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -588,7 +586,7 @@ def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_ scores_csv_path = data_files / "scores_utf8_encoded.csv" with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -614,7 +612,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -706,7 +704,7 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -762,7 +760,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -813,7 +811,7 @@ def test_admin_can_add_scores_to_other_user_score_set( with ( open(scores_csv_path, "rb") as scores_file, DependencyOverrider(admin_app_overrides), - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -839,7 +837,7 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -867,26 +865,23 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client def test_publish_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - response_data = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(response_data["urn"]), re.Match) - assert isinstance(MAVEDB_EXPERIMENT_URN_RE.fullmatch(response_data["experiment"]["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_score_set["urn"]), re.Match) + assert isinstance(MAVEDB_EXPERIMENT_URN_RE.fullmatch(published_score_set["experiment"]["urn"]), re.Match) expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), published_score_set["experiment"], published_score_set ) expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) expected_response.update( { - "urn": response_data["urn"], + "urn": published_score_set["urn"], "publishedDate": date.today().isoformat(), "numVariants": 3, "private": False, @@ -894,10 +889,10 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data "processingState": ProcessingState.success.name, } ) - assert sorted(expected_response.keys()) == sorted(response_data.keys()) + assert sorted(expected_response.keys()) == sorted(published_score_set.keys()) # refresh score set to post worker state - score_set = (client.get(f"/api/v1/score-sets/{response_data['urn']}")).json() + score_set = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() for key in expected_response: assert (key, expected_response[key]) == (key, score_set[key]) @@ -909,27 +904,18 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data def test_publish_multiple_score_sets(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) - score_set_3 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 3"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"]) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + score_set_3 = create_seq_score_set(client, experiment["urn"]) + score_set_3 = mock_worker_variant_insertion(client, session, data_provider, score_set_3, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert pub_score_set_1_response.status_code == 200 - pub_score_set_2_response = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert pub_score_set_2_response.status_code == 200 - pub_score_set_3_response = client.post(f"/api/v1/score-sets/{score_set_3['urn']}/publish") - assert pub_score_set_3_response.status_code == 200 - queue.assert_called() - pub_score_set_1_data = pub_score_set_1_response.json() - pub_score_set_2_data = pub_score_set_2_response.json() - pub_score_set_3_data = pub_score_set_3_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set_1_data = publish_score_set(client, score_set_1["urn"]) + pub_score_set_2_data = publish_score_set(client, score_set_2["urn"]) + pub_score_set_3_data = publish_score_set(client, score_set_3["urn"]) + worker_queue.assert_called() assert pub_score_set_1_data["urn"] == "urn:mavedb:00000001-a-1" assert pub_score_set_1_data["title"] == score_set_1["title"] @@ -959,10 +945,10 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 422 - queue.assert_not_called() + worker_queue.assert_not_called() response_data = response.json() assert "cannot publish score set without variant scores" in response_data["detail"] @@ -970,15 +956,15 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): def test_cannot_publish_other_user_private_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 404 - queue.assert_not_called() + worker_queue.assert_not_called() response_data = response.json() assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] @@ -988,13 +974,12 @@ def test_anonymous_cannot_publish_user_private_score_set( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with ( DependencyOverrider(anonymous_app_overrides), - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 401 @@ -1006,9 +991,8 @@ def test_anonymous_cannot_publish_user_private_score_set( def test_contributor_can_publish_other_users_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) add_contributor( session, @@ -1019,22 +1003,20 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c TEST_USER["last_name"], ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - assert response_data["urn"] == "urn:mavedb:00000001-a-1" - assert response_data["experiment"]["urn"] == "urn:mavedb:00000001-a" + assert published_score_set["urn"] == "urn:mavedb:00000001-a-1" + assert published_score_set["experiment"]["urn"] == "urn:mavedb:00000001-a" expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), published_score_set["experiment"], published_score_set ) expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) expected_response.update( { - "urn": response_data["urn"], + "urn": published_score_set["urn"], "publishedDate": date.today().isoformat(), "numVariants": 3, "private": False, @@ -1062,10 +1044,10 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c "firstName": EXTRA_USER["first_name"], "lastName": EXTRA_USER["last_name"], } - assert sorted(expected_response.keys()) == sorted(response_data.keys()) + assert sorted(expected_response.keys()) == sorted(published_score_set.keys()) # refresh score set to post worker state - score_set = (client.get(f"/api/v1/score-sets/{response_data['urn']}")).json() + score_set = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() for key in expected_response: assert (key, expected_response[key]) == (key, score_set[key]) @@ -1079,11 +1061,13 @@ def test_admin_cannot_publish_other_user_private_score_set( session, data_provider, client, admin_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with DependencyOverrider(admin_app_overrides), patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with ( + DependencyOverrider(admin_app_overrides), + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + ): response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 404 queue.assert_not_called() @@ -1099,57 +1083,49 @@ def test_admin_cannot_publish_other_user_private_score_set( def test_create_single_score_set_meta_analysis(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set["urn"]]}, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == [score_set["urn"]] - assert score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + published_score_set_refresh = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == [published_score_set_refresh["urn"]] + assert published_score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) def test_publish_single_score_set_meta_analysis(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" @@ -1159,42 +1135,38 @@ def test_multiple_score_set_meta_analysis_single_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([published_score_set_1["urn"], published_score_set_2["urn"]]) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() + + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000001-0-1" def test_multiple_score_set_meta_analysis_multiple_experiment_sets( @@ -1202,42 +1174,39 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets( ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000003-0-1" def test_multiple_score_set_meta_analysis_multiple_experiments( @@ -1247,42 +1216,39 @@ def test_multiple_score_set_meta_analysis_multiple_experiments( experiment_2 = create_experiment( client, {"title": "Experiment 2", "experimentSetUrn": experiment_1["experimentSetUrn"]} ) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000001-0-1" def test_multiple_score_set_meta_analysis_multiple_experiment_sets_different_score_sets( @@ -1290,133 +1256,117 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_different_sco ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Exp 1 Score Set 1"}, + + score_set_1_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1 exp 1"}) + score_set_1_1 = mock_worker_variant_insertion( + client, session, data_provider, score_set_1_1, data_files / "scores.csv" ) - score_set_1_2 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Exp 1 Score Set 2"}, + score_set_2_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 2 exp 1"}) + score_set_2_1 = mock_worker_variant_insertion( + client, session, data_provider, score_set_2_1, data_files / "scores.csv" ) - score_set_2_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_2["urn"], - data_files / "scores.csv", - update={"title": "Exp 2 Score Set 1"}, + score_set_1_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 1 exp 2 "}) + score_set_1_2 = mock_worker_variant_insertion( + client, session, data_provider, score_set_1_2, data_files / "scores.csv" ) - score_set_2_2 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_2["urn"], - data_files / "scores.csv", - update={"title": "Exp 2 Score Set 2"}, - ) - - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1_1 = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert response_1_1.status_code == 200 - response_1_2 = client.post(f"/api/v1/score-sets/{score_set_1_2['urn']}/publish") - assert response_1_2.status_code == 200 - response_2_1 = client.post(f"/api/v1/score-sets/{score_set_2_1['urn']}/publish") - assert response_2_1.status_code == 200 - response_2_2 = client.post(f"/api/v1/score-sets/{score_set_2_2['urn']}/publish") - assert response_2_2.status_code == 200 - queue.assert_called() - score_set_1_1 = response_1_1.json() - score_set_1_2 = response_1_2.json() - score_set_2_1 = response_2_1.json() - score_set_2_2 = response_2_2.json() - - meta_score_set_1 = create_seq_score_set_with_variants( + score_set_2_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2 exp 2"}) + score_set_2_2 = mock_worker_variant_insertion( + client, session, data_provider, score_set_2_2, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1_1 = publish_score_set(client, score_set_1_1["urn"]) + published_score_set_1_2 = publish_score_set(client, score_set_1_2["urn"]) + published_score_set_2_1 = publish_score_set(client, score_set_2_1["urn"]) + published_score_set_2_2 = publish_score_set(client, score_set_2_2["urn"]) + worker_queue.assert_called() + + meta_score_set_1 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-1 2-1", - "metaAnalyzesScoreSetUrns": [score_set_1_1["urn"], score_set_2_1["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1_1["urn"], published_score_set_1_2["urn"]], }, ) - score_set_1_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1_1['urn']}")).json() - assert meta_score_set_1["metaAnalyzesScoreSetUrns"] == sorted([score_set_1_1["urn"], score_set_2_1["urn"]]) - assert score_set_1_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_1["urn"]] - meta_score_set_2 = create_seq_score_set_with_variants( + meta_score_set_1 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_1, data_files / "scores.csv" + ) + + published_score_set_1_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1_1['urn']}")).json() + assert meta_score_set_1["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1_1["urn"], published_score_set_1_2["urn"]] + ) + assert published_score_set_1_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_1["urn"]] + + meta_score_set_2 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-2 2-2", - "metaAnalyzesScoreSetUrns": [score_set_1_2["urn"], score_set_2_2["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_2_1["urn"], published_score_set_2_2["urn"]], }, ) + meta_score_set_2 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_2, data_files / "scores.csv" + ) + published_score_set_2_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_2_1['urn']}")).json() + assert meta_score_set_2["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_2_1["urn"], published_score_set_2_2["urn"]] + ) + assert published_score_set_2_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_2["urn"]] - meta_score_set_3 = create_seq_score_set_with_variants( + meta_score_set_3 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-1 2-2", - "metaAnalyzesScoreSetUrns": [score_set_1_1["urn"], score_set_2_2["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1_1["urn"], published_score_set_2_2["urn"]], }, ) + meta_score_set_3 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_3, data_files / "scores.csv" + ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_score_set_1 = (client.post(f"/api/v1/score-sets/{meta_score_set_1['urn']}/publish")).json() - assert meta_score_set_1["urn"] == "urn:mavedb:00000003-0-1" - meta_score_set_2 = (client.post(f"/api/v1/score-sets/{meta_score_set_2['urn']}/publish")).json() - assert meta_score_set_2["urn"] == "urn:mavedb:00000003-0-2" - meta_score_set_3 = (client.post(f"/api/v1/score-sets/{meta_score_set_3['urn']}/publish")).json() - assert meta_score_set_3["urn"] == "urn:mavedb:00000003-0-3" - queue.assert_called() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set_1 = publish_score_set(client, meta_score_set_1["urn"]) + published_meta_score_set_2 = publish_score_set(client, meta_score_set_2["urn"]) + published_meta_score_set_3 = publish_score_set(client, meta_score_set_3["urn"]) + worker_queue.assert_called() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_2["urn"]), re.Match) - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_3["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_1["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_2["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_3["urn"]), re.Match) + assert published_meta_score_set_1["urn"] == "urn:mavedb:00000003-0-1" + assert published_meta_score_set_2["urn"] == "urn:mavedb:00000003-0-2" + assert published_meta_score_set_3["urn"] == "urn:mavedb:00000003-0-3" def test_cannot_add_score_set_to_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set_1 = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called() - meta_score_set_1 = create_seq_score_set_with_variants( + meta_score_set_1 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"]]}, + ) + meta_score_set_1 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_1, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_score_set_1 = (client.post(f"/api/v1/score-sets/{meta_score_set_1['urn']}/publish")).json() - assert meta_score_set_1["urn"] == "urn:mavedb:00000001-0-1" - queue.assert_called() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set_1 = publish_score_set(client, meta_score_set_1["urn"]) + worker_queue.assert_called() assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) + assert meta_score_set_1["urn"] == "urn:mavedb:00000001-0-1" + score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_2["experimentUrn"] = meta_score_set_1["experiment"]["urn"] jsonschema.validate(instance=score_set_2, schema=ScoreSetCreate.schema()) @@ -1431,29 +1381,27 @@ def test_create_single_score_set_meta_analysis_to_others_score_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called() - change_ownership(session, score_set["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set["urn"]]}, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == [score_set["urn"]] - assert score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + published_score_set_refresh = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == [published_score_set["urn"]] + assert published_score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) @@ -1461,40 +1409,38 @@ def test_multiple_score_set_meta_analysis_single_experiment_with_different_creat session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - change_ownership(session, score_set_2["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( + change_ownership(session, published_score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] + ) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called() assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) @@ -1505,39 +1451,41 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_with_differen ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() + + change_ownership(session, published_score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set( + client, + None, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - score_set_2 = (client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish")).json() - queue.assert_called() - - change_ownership(session, score_set_2["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called() - assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000003-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) ######################################################################################################################## @@ -1546,15 +1494,9 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_with_differen def test_search_private_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) @@ -1563,61 +1505,49 @@ def test_search_private_score_sets_no_match(session, data_provider, client, setu def test_search_private_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == score_set_1_1["title"] + assert response.json()[0]["title"] == score_set["title"] def test_search_private_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - search_payload = {"urn": score_set_1_1["urn"]} + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_1["urn"] + assert response.json()[0]["urn"] == score_set["urn"] # There is space in the end of test urn. The search result returned nothing before. def test_search_private_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - urn_with_space = score_set_1_1["urn"] + " " + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + urn_with_space = score_set["urn"] + " " search_payload = {"urn": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_1["urn"] + assert response.json()[0]["urn"] == score_set["urn"] def test_search_others_private_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1625,16 +1555,11 @@ def test_search_others_private_score_sets_no_match(session, data_provider, clien def test_search_others_private_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1642,12 +1567,12 @@ def test_search_others_private_score_sets_match(session, data_provider, client, def test_search_others_private_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set_1_1["urn"]} + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 0 @@ -1657,12 +1582,12 @@ def test_search_others_private_score_sets_urn_match(session, data_provider, clie def test_search_others_private_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) - urn_with_space = score_set_1_1["urn"] + " " + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + urn_with_space = score_set["urn"] + " " search_payload = {"urn": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1670,20 +1595,13 @@ def test_search_others_private_score_sets_urn_with_space_match( def test_search_public_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) @@ -1692,38 +1610,29 @@ def test_search_public_score_sets_no_match(session, data_provider, client, setup def test_search_public_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == score_set_1_1["title"] + assert response.json()[0]["title"] == score_set["title"] def test_search_public_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - published_score_set = score_set_response.json() - assert score_set_response.status_code == 200 - queue.assert_called_once() + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() urn_with_space = published_score_set["urn"] + " " search_payload = {"urn": urn_with_space} @@ -1734,23 +1643,16 @@ def test_search_public_score_sets_urn_with_space_match(session, data_provider, c def test_search_others_public_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1758,65 +1660,52 @@ def test_search_others_public_score_sets_no_match(session, data_provider, client def test_search_others_public_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + assert session.query(ScoreSetDbModel).filter_by(urn=published_score_set["urn"]).one() - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) - assert session.query(ScoreSetDbModel).filter_by(urn=publish_score_set["urn"]).one() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == publish_score_set["title"] + assert response.json()[0]["title"] == published_score_set["title"] def test_search_others_public_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set_1_1["urn"]} + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == publish_score_set["urn"] + assert response.json()[0]["urn"] == published_score_set["urn"] def test_search_others_public_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = score_set_response.json() change_ownership(session, published_score_set["urn"], ScoreSetDbModel) urn_with_space = published_score_set["urn"] + " " search_payload = {"urn": urn_with_space} @@ -1829,46 +1718,41 @@ def test_search_others_public_score_sets_urn_with_space_match( def test_search_private_score_sets_not_showing_public_score_set( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - score_set_1_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() search_payload = {"published": False} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_2["urn"] + assert response.json()[0]["urn"] == score_set_2["urn"] def test_search_public_score_sets_not_showing_private_score_set( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - create_seq_score_set_with_variants(client, session, data_provider, experiment_1["urn"], data_files / "scores.csv") + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() - published_score_set = score_set_response.json() search_payload = {"published": True} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == published_score_set["urn"] + assert response.json()[0]["urn"] == published_score_set_1["urn"] ######################################################################################################################## @@ -1880,9 +1764,8 @@ def test_anonymous_cannot_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with DependencyOverrider(anonymous_app_overrides): response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1895,18 +1778,15 @@ def test_anonymous_cannot_delete_other_users_published_scoreset( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(anonymous_app_overrides): - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") assert del_response.status_code == 401 del_response_data = del_response.json() @@ -1915,9 +1795,8 @@ def test_anonymous_cannot_delete_other_users_published_scoreset( def test_can_delete_own_private_scoreset(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1926,30 +1805,26 @@ def test_can_delete_own_private_scoreset(session, data_provider, client, setup_r def test_cannot_delete_own_published_scoreset(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") assert del_response.status_code == 403 del_response_data = del_response.json() - assert f"insufficient permissions for URN '{response_data['urn']}'" in del_response_data["detail"] + assert f"insufficient permissions for URN '{published_score_set['urn']}'" in del_response_data["detail"] def test_contributor_can_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) add_contributor( session, @@ -1969,9 +1844,8 @@ def test_admin_can_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with DependencyOverrider(admin_app_overrides): response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1983,20 +1857,16 @@ def test_admin_can_delete_other_users_published_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(admin_app_overrides): - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") - - assert del_response.status_code == 200 + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") + assert del_response.status_code == 200 ######################################################################################################################## @@ -2026,33 +1896,32 @@ def test_cannot_add_score_set_to_others_private_experiment(session, client, setu def test_can_add_score_set_to_own_public_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_2["experimentUrn"] = pub_score_set_1["experiment"]["urn"] + score_set_2["experimentUrn"] = published_score_set_1["experiment"]["urn"] response = client.post("/api/v1/score-sets/", json=score_set_2) assert response.status_code == 200 def test_can_add_score_set_to_others_public_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() - change_ownership(session, pub_score_set_1["experiment"]["urn"], ExperimentDbModel) + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_2["experimentUrn"] = pub_score_set_1["experiment"]["urn"] + score_set_2["experimentUrn"] = published_experiment_urn response = client.post("/api/v1/score-sets/", json=score_set_2) assert response.status_code == 200 @@ -2078,25 +1947,25 @@ def test_contributor_can_add_score_set_to_others_public_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - published_score_set = (client.post(f"/api/v1/score-sets/{score_set['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) add_contributor( session, - published_score_set["experiment"]["urn"], + published_experiment_urn, ExperimentDbModel, TEST_USER["username"], TEST_USER["first_name"], TEST_USER["last_name"], ) score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] + score_set_post_payload["experimentUrn"] = published_experiment_urn response = client.post("/api/v1/score-sets/", json=score_set_post_payload) assert response.status_code == 200 @@ -2138,15 +2007,13 @@ def test_can_modify_metadata_for_score_set_with_inactive_license(session, client def test_create_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = publish_score_set_response.json() score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2156,15 +2023,15 @@ def test_create_superseding_score_set(session, data_provider, client, setup_rout def test_can_view_unpublished_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2182,15 +2049,14 @@ def test_cannot_view_others_unpublished_superseding_score_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = publish_score_set_response.json() score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2208,30 +2074,24 @@ def test_cannot_view_others_unpublished_superseding_score_set( def test_can_view_others_published_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - published_superseding_score_set_response = client.post( - f"/api/v1/score-sets/{superseding_score_set['urn']}/publish" - ) - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_superseding_score_set = published_superseding_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + superseding_score_set = create_seq_score_set( + client, published_score_set["experiment"]["urn"], update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" + ) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel) @@ -2248,14 +2108,14 @@ def test_show_correct_score_set_version_with_superseded_score_set_to_its_owner( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2268,6 +2128,11 @@ def test_show_correct_score_set_version_with_superseded_score_set_to_its_owner( assert score_set["urn"] == superseding_score_set["urn"] +######################################################################################################################## +# Score Calibrations +######################################################################################################################## + + def test_anonymous_user_cannot_add_score_calibrations_to_score_set(client, setup_router_db, anonymous_app_overrides): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -2367,18 +2232,15 @@ def test_upload_a_non_utf8_file(session, client, setup_router_db, data_files): # Test file doesn't have hgvs_splice so its values are all NA. def test_download_scores_file(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - publish_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() download_scores_csv_response = client.get( - f"/api/v1/score-sets/{publish_score_set['urn']}/scores?drop_na_columns=true" + f"/api/v1/score-sets/{published_score_set['urn']}/scores?drop_na_columns=true" ) assert download_scores_csv_response.status_code == 200 download_scores_csv = download_scores_csv_response.text @@ -2391,22 +2253,16 @@ def test_download_scores_file(session, data_provider, client, setup_router_db, d def test_download_counts_file(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment["urn"], - scores_csv_path=data_files / "scores.csv", - counts_csv_path=data_files / "counts.csv", + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion( + client, session, data_provider, score_set, data_files / "scores.csv", data_files / "counts.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - publish_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() download_counts_csv_response = client.get( - f"/api/v1/score-sets/{publish_score_set['urn']}/counts?drop_na_columns=true" + f"/api/v1/score-sets/{published_score_set['urn']}/counts?drop_na_columns=true" ) assert download_counts_csv_response.status_code == 200 download_counts_csv = download_counts_csv_response.text diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index 279147e6..a26f349e 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -1,8 +1,14 @@ -from unittest.mock import patch +# ruff: noqa: E402 -import cdot.hgvs.dataproviders import pytest from humps import camelize +from unittest.mock import patch + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.published_variant import PublishedVariantsMV from tests.helpers.constants import ( TEST_BIORXIV_IDENTIFIER, @@ -14,12 +20,9 @@ TEST_PUBMED_IDENTIFIER, VALID_GENE, ) -from tests.helpers.util import ( - create_acc_score_set_with_variants, - create_experiment, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.score_set import publish_score_set, create_acc_score_set, create_seq_score_set +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.variant import mock_worker_variant_insertion, create_mapped_variants_for_score_set TARGET_ACCESSION_FIELDS = ["accession", "assembly", "gene"] TARGET_SEQUENCE_FIELDS = ["sequence", "sequence-type"] @@ -36,6 +39,41 @@ } +# Fixtures for setting up score sets on which to calculate statistics. +# Adds an experiment and score set to the database, then publishes the score set. +@pytest.fixture +def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): + experiment = create_experiment(client) + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + score_set = create_acc_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion( + client, session, data_provider, score_set, data_files / "scores_acc.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + +@pytest.fixture +def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_files): + experiment = create_experiment(client) + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" + ) + create_mapped_variants_for_score_set(session, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + # Note that we have not created indexes for this view when it is generated via metadata. This differs + # from the database created via alembic, which does create indexes. + PublishedVariantsMV.refresh(session, False) + session.commit() + + def assert_statistic(desired_field_value, response): """ Each statistic test must check that the response code was 200, @@ -206,21 +244,20 @@ def test_target_gene_identifier_statistiscs( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT ): - score_set = create_acc_score_set_with_variants( - client, - session, - data_provider, - experiment["urn"], - data_files / "scores_acc.csv", - {"targetGenes": [target]}, + unpublished_score_set = create_acc_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores_acc.csv" ) elif "targetSequence" in target: - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", {"targetGenes": [target]} + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/target/gene/{field_value}") desired_field_value = EXTERNAL_IDENTIFIERS[field_value]["identifier"]["identifier"] @@ -278,11 +315,14 @@ def test_record_publication_identifier_statistics( # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. record_update = {"primaryPublicationIdentifiers": [mocked_publication]} experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/publication-identifiers") @@ -308,11 +348,14 @@ def test_record_keyword_statistics(session, data_provider, client, setup_router_ # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get("/api/v1/statistics/record/experiment/keywords") desired_field_values = ["SaCas9", "Endogenous locus library method", "Base editor", "Other"] @@ -330,11 +373,14 @@ def test_record_doi_identifier_statistics(session, data_provider, client, setup_ # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/doi-identifiers") desired_field_value = record_update["doiIdentifiers"][0]["identifier"] @@ -353,11 +399,14 @@ def test_record_raw_read_identifier_statistics( # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/raw-read-identifiers") desired_field_value = record_update["rawReadIdentifiers"][0]["identifier"] diff --git a/tests/routers/test_target_gene.py b/tests/routers/test_target_gene.py index 4a607101..281c5265 100644 --- a/tests/routers/test_target_gene.py +++ b/tests/routers/test_target_gene.py @@ -1,21 +1,22 @@ +# ruff: noqa: E402 +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from tests.helpers.util import ( - change_ownership, - create_experiment, - create_seq_score_set_with_variants, -) + +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.user import change_ownership +from tests.helpers.util.score_set import create_seq_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_search_my_target_genes_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "NONEXISTENT"} response = client.post("/api/v1/me/target-genes/search", json=search_payload) @@ -24,15 +25,9 @@ def test_search_my_target_genes_no_match(session, data_provider, client, setup_r def test_search_my_target_genes_no_match_on_other_user(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "TEST1"} @@ -42,15 +37,9 @@ def test_search_my_target_genes_no_match_on_other_user(session, data_provider, c def test_search_my_target_genes_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "TEST1"} response = client.post("/api/v1/me/target-genes/search", json=search_payload) @@ -60,15 +49,9 @@ def test_search_my_target_genes_match(session, data_provider, client, setup_rout def test_search_target_genes_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "NONEXISTENT"} response = client.post("/api/v1/target-genes/search", json=search_payload) @@ -77,15 +60,9 @@ def test_search_target_genes_no_match(session, data_provider, client, setup_rout def test_search_target_genes_match_on_other_user(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "TEST1"} @@ -96,15 +73,9 @@ def test_search_target_genes_match_on_other_user(session, data_provider, client, def test_search_target_genes_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "TEST1"} response = client.post("/api/v1/target-genes/search", json=search_payload) diff --git a/tests/routers/test_users.py b/tests/routers/test_users.py index 8110ed72..bae66fbc 100644 --- a/tests/routers/test_users.py +++ b/tests/routers/test_users.py @@ -1,10 +1,16 @@ -from unittest import mock +# ruff: noqa: E402 import pytest +from unittest import mock + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") from mavedb.lib.authentication import get_current_user from mavedb.lib.authorization import require_current_user from mavedb.models.enums.user_role import UserRole + from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER, camelize from tests.helpers.dependency_overrider import DependencyOverrider diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py new file mode 100644 index 00000000..8e4596db --- /dev/null +++ b/tests/validation/dataframe/conftest.py @@ -0,0 +1,43 @@ +import pandas as pd +import pytest +from unittest import mock, TestCase + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from tests.helpers.constants import TEST_CDOT_TRANSCRIPT + + +@pytest.fixture +def mocked_data_provider_class_attr(request): + """ + Sets the `human_data_provider` attribute on the class from the requesting + test context to the `data_provider` fixture. This allows fixture use across + the `unittest.TestCase` class. + """ + data_provider = mock.Mock() + data_provider._get_transcript.return_value = TEST_CDOT_TRANSCRIPT + request.cls.mocked_human_data_provider = data_provider + + +# Special DF Test Case that contains dummy data for tests below +@pytest.mark.usefixtures("mocked_data_provider_class_attr") +class DfTestCase(TestCase): + def setUp(self): + self.dataframe = pd.DataFrame( + { + hgvs_nt_column: ["g.1A>G", "g.1A>T"], + hgvs_splice_column: ["c.1A>G", "c.1A>T"], + hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], + required_score_column: [1.0, 2.0], + "extra": [12.0, 3.0], + "count1": [3.0, 5.0], + "count2": [9, 10], + "extra2": ["pathogenic", "benign"], + "mixed_types": ["test", 1.0], + "null_col": [None, None], + } + ) diff --git a/tests/validation/dataframe/test_column.py b/tests/validation/dataframe/test_column.py new file mode 100644 index 00000000..a11da0bb --- /dev/null +++ b/tests/validation/dataframe/test_column.py @@ -0,0 +1,272 @@ +from unittest import TestCase +from unittest.mock import Mock +import pandas as pd + +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.dataframe.column import ( + construct_target_sequence_mappings, + infer_column_type, + generate_variant_prefixes, + validate_data_column, + validate_hgvs_column_properties, + validate_variant_formatting, + validate_variant_column, +) + +from tests.validation.dataframe.conftest import DfTestCase + + +class TestInferColumnType(TestCase): + def test_floats(self): + test_data = pd.Series([12.0, 1.0, -0.012, 5.75]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_ints(self): + test_data = pd.Series([12, 1, 0, -5]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_floats_with_na(self): + test_data = pd.Series([12.0, 1.0, None, -0.012, 5.75]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_ints_with_na(self): + test_data = pd.Series([12, 1, None, 0, -5]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_convertable_strings(self): + test_data = pd.Series(["12.5", 1.25, "0", "-5"]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_strings(self): + test_data = pd.Series(["hello", "test", "suite", "123abc"]) + self.assertEqual(infer_column_type(test_data), "string") + + def test_strings_with_na(self): + test_data = pd.Series(["hello", "test", None, "suite", "123abc"]) + self.assertEqual(infer_column_type(test_data), "string") + + def test_mixed(self): + test_data = pd.Series(["hello", 12.123, -75, "123abc"]) + self.assertEqual(infer_column_type(test_data), "mixed") + + def test_mixed_with_na(self): + test_data = pd.Series(["hello", None, 12.123, -75, "123abc"]) + self.assertEqual(infer_column_type(test_data), "mixed") + + def test_all_na(self): + test_data = pd.Series([None] * 5) + self.assertEqual(infer_column_type(test_data), "empty") + + +class TestValidateVariantFormatting(TestCase): + def setUp(self) -> None: + super().setUp() + + self.valid = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + self.inconsistent = pd.Series(["g.1A>G", "c.1A>T"], name=hgvs_nt_column) + self.valid_prefixes = ["g."] + self.invalid_prefixes = ["c."] + self.valid_target = ["single_target"] + + self.valid_multi = pd.Series(["test1:g.1A>G", "test2:g.1A>T"], name=hgvs_nt_column) + self.invalid_multi = pd.Series(["test3:g.1A>G", "test3:g.1A>T"], name=hgvs_nt_column) + self.inconsistent_multi = pd.Series(["test1:g.1A>G", "test2:c.1A>T"], name=hgvs_nt_column) + self.valid_targets = ["test1", "test2"] + + def test_single_target_valid_variants(self): + validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_target, False) + + def test_single_target_inconsistent_variants(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.inconsistent, self.valid_prefixes, self.valid_target, False) + + def test_single_target_invalid_prefixes(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid, self.invalid_prefixes, self.valid_target, False) + + def test_multi_target_valid_variants(self): + validate_variant_formatting(self.valid_multi, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_inconsistent_variants(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.inconsistent_multi, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_invalid_prefixes(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid_multi, self.invalid_prefixes, self.valid_targets, True) + + def test_multi_target_lacking_full_coords(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_invalid_accessions(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.invalid_multi, self.valid_prefixes, self.valid_targets, True) + + +class TestValidateVariantColumn(DfTestCase): + def setUp(self): + super().setUp() + + def test_invalid_column_type_index(self): + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe[required_score_column], True) + + def test_invalid_column_type(self): + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe[required_score_column], False) + + def test_null_values_type_index(self): + self.dataframe.iloc[1, self.dataframe.columns.get_loc(hgvs_nt_column)] = pd.NA + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe.iloc[0, :], True) + + def test_null_values_type(self): + self.dataframe.iloc[1, self.dataframe.columns.get_loc(hgvs_nt_column)] = pd.NA + validate_variant_column(self.dataframe[hgvs_nt_column], False) + + def test_nonunique_values_index(self): + self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe["dup_col"], True) + + def test_nonunique_values(self): + self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] + validate_variant_column(self.dataframe["dup_col"], False) + + def test_variant_column_is_valid(self): + validate_variant_column(self.dataframe[hgvs_nt_column], True) + + +class TestGenerateVariantPrefixes(DfTestCase): + def setUp(self): + super().setUp() + + self.nt_prefixes = ["c.", "n.", "g.", "m.", "o."] + self.splice_prefixes = ["c.", "n."] + self.pro_prefixes = ["p."] + + def test_nt_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_nt_column]) + assert prefixes == self.nt_prefixes + + def test_pro_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_pro_column]) + assert prefixes == self.pro_prefixes + + def test_splice_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_splice_column]) + assert prefixes == self.splice_prefixes + + def test_unrecognized_column_prefixes(self): + with self.assertRaises(ValueError): + generate_variant_prefixes(self.dataframe["extra"]) + + +class TestValidateDataColumn(DfTestCase): + def test_valid(self): + validate_data_column(self.dataframe[required_score_column]) + + def test_null_column(self): + self.dataframe[required_score_column] = None + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe[required_score_column]) + + def test_missing_data(self): + self.dataframe.loc[0, "extra"] = None + validate_data_column(self.dataframe["extra"]) + + def test_force_numeric(self): + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe["extra2"], force_numeric=True) + + def test_mixed_types_invalid(self): + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe["mixed_types"]) + + +class TestValidateHgvsColumnProperties(TestCase): + def setUp(self): + self.dna_observed = ["dna"] + self.protein_observed = ["protein"] + self.mixed_observed = ["dna", "protein"] + + def test_valid_dna_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + validate_hgvs_column_properties(column, self.dna_observed) + + def test_invalid_dna_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + with self.assertRaises(ValueError): + validate_hgvs_column_properties(column, self.protein_observed) + + def test_valid_splice_column(self): + column = pd.Series(["c.1-2A>G", "c.1-2A>T"], name=hgvs_splice_column) + validate_hgvs_column_properties(column, self.mixed_observed) + + def test_valid_protein_column(self): + column = pd.Series(["p.Met1Leu", "p.Met1Val"], name=hgvs_pro_column) + validate_hgvs_column_properties(column, self.mixed_observed) + + def test_invalid_column_name(self): + column = pd.Series(["x.1A>G", "x.1A>T"], name="invalid_column") + with self.assertRaises(ValueError): + validate_hgvs_column_properties(column, self.mixed_observed) + + +class TestConstructTargetSequenceMappings(TestCase): + def setUp(self): + mock_seq1, mock_seq2, mock_seq3 = Mock(), Mock(), Mock() + mock_seq1.sequence = "ATGCGT" + mock_seq1.sequence_type = "dna" + mock_seq2.sequence = "MR" + mock_seq2.sequence_type = "protein" + mock_seq3.sequence = None + mock_seq3.sequence_type = "dna" + + self.targets = { + "target1": mock_seq1, + "target2": mock_seq2, + "target3": mock_seq3, + } + + def test_nt_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + expected = { + "target1": "ATGCGT", + "target2": "MR", + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_splice_column(self): + column = pd.Series(["c.1-2A>G", "c.1-2A>T"], name=hgvs_splice_column) + expected = { + "target1": None, + "target2": None, + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_pro_column(self): + column = pd.Series(["p.Met1Leu", "p.Met1Val"], name=hgvs_pro_column) + expected = { + "target1": "MR", + "target2": "MR", + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_invalid_column_name(self): + column = pd.Series(["x.1A>G", "x.1A>T"], name="invalid_column") + with self.assertRaises(ValueError): + construct_target_sequence_mappings(column, self.targets) diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py new file mode 100644 index 00000000..4bca6f2f --- /dev/null +++ b/tests/validation/dataframe/test_dataframe.py @@ -0,0 +1,414 @@ +import itertools +from unittest import TestCase + +import numpy as np +import pandas as pd +import pytest + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.dataframe.dataframe import ( + choose_dataframe_index_column, + sort_dataframe_columns, + standardize_dataframe, + validate_and_standardize_dataframe_pair, + validate_column_names, + validate_hgvs_prefix_combinations, + validate_no_null_rows, + validate_variant_columns_match, +) +from mavedb.lib.validation.exceptions import ValidationError +from tests.validation.dataframe.conftest import DfTestCase + + +class TestSortDataframeColumns(DfTestCase): + def test_preserve_sorted(self): + sorted_df = sort_dataframe_columns(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe(self): + sorted_df = sort_dataframe_columns( + self.dataframe[ + [ + hgvs_splice_column, + "extra", + "count1", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count2", + "extra2", + "mixed_types", + "null_col", + ] + ] + ) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe_is_case_insensitive(self): + self.dataframe = self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}) + sorted_df = sort_dataframe_columns(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe_preserves_extras_order(self): + sorted_df = sort_dataframe_columns( + self.dataframe[ + [ + hgvs_splice_column, + "count2", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count1", + "extra2", + "extra", + "mixed_types", + ] + ] + ) + pd.testing.assert_frame_equal( + self.dataframe[ + [ + hgvs_nt_column, + hgvs_splice_column, + hgvs_pro_column, + required_score_column, + "count2", + "count1", + "extra2", + "extra", + "mixed_types", + ] + ], + sorted_df, + ) + + +class TestStandardizeDataframe(DfTestCase): + def test_preserve_standardized(self): + standardized_df = standardize_dataframe(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardize_changes_case_variants(self): + standardized_df = standardize_dataframe(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()})) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardice_changes_case_scores(self): + standardized_df = standardize_dataframe( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}) + ) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardize_preserves_extras_case(self): + standardized_df = standardize_dataframe(self.dataframe.rename(columns={"extra": "extra".upper()})) + pd.testing.assert_frame_equal(self.dataframe.rename(columns={"extra": "extra".upper()}), standardized_df) + + def test_standardize_sorts_columns(self): + standardized_df = standardize_dataframe( + self.dataframe.loc[ + :, + [ + hgvs_splice_column, + "count2", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count1", + "extra", + ], + ] + ) + pd.testing.assert_frame_equal( + self.dataframe[ + [ + hgvs_nt_column, + hgvs_splice_column, + hgvs_pro_column, + required_score_column, + "count2", + "count1", + "extra", + ] + ], + standardized_df, + ) + + +class TestValidateStandardizeDataFramePair(DfTestCase): + def test_no_targets(self): + with self.assertRaises(ValueError): + validate_and_standardize_dataframe_pair( + self.dataframe, counts_df=None, targets=[], hdp=self.mocked_human_data_provider + ) + + # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok + + +class TestNullRows(DfTestCase): + def test_null_row(self): + self.dataframe.iloc[1, :] = None + with self.assertRaises(ValidationError): + validate_no_null_rows(self.dataframe) + + def test_valid(self): + validate_no_null_rows(self.dataframe) + + def test_only_hgvs_row(self): + self.dataframe.loc[1, [required_score_column, "extra", "count1", "count2"]] = None + validate_no_null_rows(self.dataframe) + + +class TestColumnNames(DfTestCase): + def test_only_two_kinds_of_dataframe(self): + with self.assertRaises(ValueError): + validate_column_names(self.dataframe, kind="score2") + + def test_score_df_has_score_column(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") + + def test_count_df_lacks_score_column(self): + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe, kind="counts") + + def test_count_df_has_score_column(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe, kind="counts") + + def test_df_with_only_scores(self): + validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") + + def test_count_df_must_have_data(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") + + def test_just_hgvs_nt(self): + validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + ) + + def test_just_hgvs_pro(self): + validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + ) + + def test_just_hgvs_pro_and_nt(self): + validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") + + def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" + ) + + def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") + + def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") + + def test_no_hgvs_column_scores(self): + with pytest.raises(ValidationError) as exc_info: + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" + ) + assert "dataframe does not define any variant columns" in str(exc_info.value) + + def test_no_hgvs_column_counts(self): + with pytest.raises(ValidationError) as exc_info: + validate_column_names( + self.dataframe.drop( + [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 + ), + kind="counts", + ) + assert "dataframe does not define any variant columns" in str(exc_info.value) + + def test_validation_ignores_column_ordering_scores(self): + validate_column_names( + self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" + ) + validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") + validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") + + def test_validation_ignores_column_ordering_counts(self): + validate_column_names( + self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" + ) + validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") + validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") + + def test_validation_is_case_insensitive(self): + validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" + ) + + def test_duplicate_hgvs_column_names_scores(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") + + def test_duplicate_hgvs_column_names_counts(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), + kind="counts", + ) + + def test_duplicate_score_column_names(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") + + def test_duplicate_data_column_names_scores(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") + + def test_duplicate_data_column_names_counts(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" + ) + + # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses + def test_invalid_column_names_scores(self): + invalid_values = [None, np.nan, "", " "] + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") + + def test_invalid_column_names_counts(self): + invalid_values = [None, np.nan, "", " "] + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename( + columns={hgvs_splice_column: value} + ), + kind="counts", + ) + + def test_ignore_column_ordering_scores(self): + validate_column_names( + self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], + kind="scores", + ) + + def test_ignore_column_ordering_counts(self): + validate_column_names( + self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], + kind="counts", + ) + + +class TestChooseDataframeIndexColumn(DfTestCase): + def setUp(self): + super().setUp() + + def test_nt_index_column(self): + index = choose_dataframe_index_column(self.dataframe) + assert index == hgvs_nt_column + + def test_pro_index_column(self): + index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) + assert index == hgvs_pro_column + + def test_no_valid_index_column(self): + with self.assertRaises(ValidationError): + choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) + + +class TestValidateHgvsPrefixCombinations(TestCase): + def setUp(self): + self.valid_combinations = [ + ("g", "c", "p"), + ("m", "c", "p"), + ("o", "c", "p"), + ("g", "n", None), + ("m", "n", None), + ("o", "n", None), + ("n", None, None), + ("c", None, "p"), + (None, None, "p"), + (None, None, None), # valid for this validator, but a dataframe with no variants should be caught upstream + ] + self.invalid_combinations = [ + t + for t in itertools.product(("c", "n", "g", "m", "o", None), ("c", "n", None), ("p", None)) + if t not in self.valid_combinations + ] + + def test_valid_combinations(self): + for t in self.valid_combinations: + with self.subTest(t=t): + validate_hgvs_prefix_combinations(*t, True) + + def test_invalid_combinations(self): + for t in self.invalid_combinations: + with self.subTest(t=t): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(*t, True) + + # TODO: biocommons.HGVS validation clashes here w/ our custom validators: + # n. prefix is the problematic one, for now. + @pytest.mark.skip() + def test_invalid_combinations_biocommons(self): + for t in self.invalid_combinations: + with self.subTest(t=t): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(*t, False) + + def test_invalid_combinations_value_error_nt(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("p", None, None, True) + + def test_invalid_combinations_value_error_nt_pro(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("c", None, "P", True) + + def test_invalid_combinations_value_error_splice(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("x", "c", "p", True) + + +class TestValidateVariantColumnsMatch(DfTestCase): + def test_same_df(self): + validate_variant_columns_match(self.dataframe, self.dataframe) + + def test_ignore_order(self): + validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) + + def test_missing_column(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) + + def test_missing_variant(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_pro_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py new file mode 100644 index 00000000..810780f4 --- /dev/null +++ b/tests/validation/dataframe/test_variant.py @@ -0,0 +1,893 @@ +import pytest +import pandas as pd +import unittest +from unittest.mock import Mock, patch + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, +) +from mavedb.lib.validation.dataframe.variant import ( + validate_hgvs_transgenic_column, + validate_hgvs_genomic_column, + parse_genomic_variant, + parse_transgenic_variant, + validate_observed_sequence_types, + validate_hgvs_prefix_combinations, +) +from mavedb.lib.validation.exceptions import ValidationError + +from tests.helpers.constants import VALID_ACCESSION, TEST_CDOT_TRANSCRIPT +from tests.validation.dataframe.conftest import DfTestCase + + +try: + import hgvs # noqa: F401 + import cdot.hgvs.dataproviders # noqa: F401 + + HGVS_INSTALLED = True +except ModuleNotFoundError: + HGVS_INSTALLED = False + + +# Spoof the target sequence type +class NucleotideSequenceTestCase: + def __init__(self): + self.sequence = "ATG" + self.sequence_type = "dna" + + +class ProteinSequenceTestCase: + def __init__(self): + self.sequence = "MTG" + self.sequence_type = "protein" + + +class TestValidateTransgenicColumn(DfTestCase): + def setUp(self): + super().setUp() + + self.valid_hgvs_columns = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), + pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_splice_column), + pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_nt_only = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), + pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), + ] + + self.valid_hgvs_columns_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_splice_column), + pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_nt_only_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), + ] + + self.valid_hgvs_columns_invalid_names = [ + pd.Series(["g.1A>G", "g.1A>T"], name="invalid_column_name"), + pd.Series(["p.Met1Val", "p.Met1Leu"], name="invalid_column_name"), + ] + + self.valid_hgvs_columns_invalid_names_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name="invalid_column_name"), + pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name="invalid_column_name"), + ] + + self.valid_hgvs_columns_invalid_for_index = [ + # missing data + pd.Series(["c.1A>G", None], name=hgvs_nt_column), + pd.Series([None, "p.Met1Val"], name=hgvs_pro_column), + pd.Series([None, None], name=hgvs_nt_column), + pd.Series([None, None], name=hgvs_pro_column), + # duplicate rows + pd.Series(["c.1A>G", "c.1A>G"], name=hgvs_nt_column), + pd.Series(["p.Met1Val", "p.Met1Val"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_invalid_for_index_multi_target = [ + # missing data + pd.Series(["test_nt:c.1A>G", None], name=hgvs_nt_column), + pd.Series([None, "test_pt:p.Met1Val"], name=hgvs_pro_column), + pd.Series([None, None], name=hgvs_nt_column), + pd.Series([None, None], name=hgvs_pro_column), + # duplicate rows + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>G"], name=hgvs_nt_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Val"], name=hgvs_pro_column), + ] + + self.invalid_hgvs_columns_by_name = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_splice_column), + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_pro_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_pro_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_pro_column), + pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_name_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_splice_column), + pd.Series(["test_pt:g.1A>G", "test_pt:g.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_nt_column), + pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_contents = [ + pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_splice_column), # rna not allowed + pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_nt_column), # rna not allowed + pd.Series(["c.1A>G", "c.5A>T"], name=hgvs_nt_column), # out of bounds for target + pd.Series(["c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series(["p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series(["n.1A>G", "c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix + pd.Series(["c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.invalid_hgvs_columns_by_contents_multi_target = [ + pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_splice_column), # rna not allowed + pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # rna not allowed + pd.Series(["bad_label:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # invalid label + pd.Series(["test_nt:c.1A>G", "test_nt:c.5A>T"], name=hgvs_nt_column), # out of bounds for target + pd.Series(["test_nt:c.1A>G", "test_nt:_wt"], name=hgvs_nt_column), # old special variant + pd.Series(["test_pt:p.Met1Leu", "test_nt:_sy"], name=hgvs_pro_column), # old special variant + pd.Series(["test_nt:n.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series(["test_nt:c.1A>G", "test_pt:p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix + pd.Series(["test_pt:c.1A>G", "bad_label:p.Met1Leu"], name=hgvs_pro_column), # invalid label + pd.Series(["test_nt:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.nt_sequence_test_case = NucleotideSequenceTestCase() + self.pt_sequence_test_case = ProteinSequenceTestCase() + + def test_valid_columns_single_target(self): + for column in self.valid_hgvs_columns: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_multi_target(self): + for column in self.valid_hgvs_columns_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + # Test when supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) + def test_valid_columns_invalid_supplied_targets(self): + for column in self.valid_hgvs_columns_nt_only: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + # Test when multiple supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) + def test_valid_columns_invalid_supplied_targets_multi_target(self): + for column in self.valid_hgvs_columns_nt_only_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_pt": self.pt_sequence_test_case, "test_pt_2": self.pt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_invalid_column_name(self): + for column in self.valid_hgvs_columns_invalid_names: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_invalid_column_name_multi_target(self): + for column in self.valid_hgvs_columns_invalid_names_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_index_columns(self): + for column in self.valid_hgvs_columns: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_index_columns_multi_target(self): + for column in self.valid_hgvs_columns_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_invalid_column_values(self): + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_invalid_column_values_multi_target(self): + for column in self.invalid_hgvs_columns_by_contents_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_valid_column_values_wrong_column_name(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_multi_target(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + +# Spoof the accession type +class AccessionTestCase: + def __init__(self): + self.accession = VALID_ACCESSION + + +class GenomicColumnValidationTestCase(DfTestCase): + def setUp(self): + super().setUp() + + self.accession_test_case = AccessionTestCase() + + self.valid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + ) + self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) + self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) + + self.invalid_hgvs_columns_by_name = [ + pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), + pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_contents = [ + pd.Series( + [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column + ), # rna not allowed + pd.Series( + [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column + ), # rna not allowed + pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column + ), # mixed types/prefix + pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified + pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.invalid_hgvs_columns_by_contents_under_strict_validation = [ + pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column + ), # out of bounds for target + ] + + +class TestValidateHgvsGenomicColumn(GenomicColumnValidationTestCase): + # Identical behavior for installed/uninstalled HGVS + def test_valid_variant_invalid_missing_index(self): + with ( + self.assertRaises(ValidationError), + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + # Identical behavior for installed/uninstalled HGVS + def test_valid_variant_invalid_duplicate_index(self): + with ( + self.assertRaises(ValidationError), + ): + validate_hgvs_genomic_column( + self.duplicate_data, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +@pytest.fixture +def patched_data_provider_class_attr(request, data_provider): + """ + Sets the `human_data_provider` attribute on the class from the requesting + test context to the `data_provider` fixture. This allows fixture use across + the `unittest.TestCase` class. + """ + request.cls.patched_human_data_provider = data_provider + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +@pytest.mark.usefixtures("patched_data_provider_class_attr") +class TestValidateHgvsGenomicColumnHgvsInstalled(GenomicColumnValidationTestCase): + def test_valid_variant(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_missing(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_duplicate(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_index(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_invalid_column_values(self): + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + + def test_valid_column_values_wrong_column_name(self): + for column in self.invalid_hgvs_columns_by_name: + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + + # TODO: Test multiple targets + + +@unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") +class TestValidateHgvsGenomicColumnHgvsNotInstalled(GenomicColumnValidationTestCase): + def test_valid_variant_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_limited_validation(self): + validate_hgvs_genomic_column( + self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=None + ) # type: ignore + + def test_valid_variant_valid_missing_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_missing_limited_validation(self): + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + + def test_valid_variant_valid_duplicate_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_duplicate_limited_validation(self): + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + + def test_valid_variant_index_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_index_limited_validation(self): + validate_hgvs_genomic_column( + self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=None + ) # type: ignore + + def test_invalid_column_values_strict_validation(self): + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with self.subTest(column=column), self.assertRaises((ValidationError, ModuleNotFoundError)): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with self.subTest(column=column), self.assertRaises((ValidationError, ModuleNotFoundError)): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + + def test_invalid_column_values_limited_validation(self): + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents_under_strict_validation: + with self.subTest(column=column): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_strict_validation(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_limited_validation(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + + +class TestParseGenomicVariant(unittest.TestCase): + def setUp(self): + super().setUp() + + self.parser = Mock() + self.validator = Mock() + self.parser.parse.return_value = "irrelevant" + self.validator.validate.return_value = True + + self.falsy_variant_strings = [None, ""] + self.valid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + ) + self.invalid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1laksdfG>A", f"{VALID_ACCESSION}:c.2kadlfjA>T"], name=hgvs_nt_column + ) + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +class TestParseGenomicVariantHgvsInstalled(TestParseGenomicVariant): + def test_parse_genomic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, None, self.parser, self.validator) + assert valid + assert error is None + + def test_parse_valid_hgvs_variant(self): + for variant_string in self.valid_hgvs_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + assert valid + assert error is None + + def test_parse_invalid_hgvs_variant(self): + from hgvs.exceptions import HGVSError + + self.validator.validate.side_effect = HGVSError("Invalid variant") + + for variant_string in self.invalid_hgvs_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + assert not valid + assert "Failed to parse row 0 with HGVS exception:" in error + + +@unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") +class TestParseGenomicVariantHgvsNotInstalled(TestParseGenomicVariant): + def test_parse_genomic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, None, self.parser, self.validator) + + def test_parse_valid_hgvs_variant(self): + for variant_string in self.valid_hgvs_column: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + + def test_parse_invalid_hgvs_variant(self): + for variant_string in self.invalid_hgvs_column: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + + +class TestParseTransgenicVariant(unittest.TestCase): + def setUp(self): + super().setUp() + + self.target_sequences = {f"{VALID_ACCESSION}": "ATGC"} + + self.falsy_variant_strings = [None, ""] + self.valid_fully_qualified_transgenic_column = pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.2T>G {VALID_ACCESSION}:c.2T>G"], name=hgvs_nt_column + ) + self.valid_basic_transgenic_column = pd.Series(["c.1A>G", "c.2T>G c.2T>G"], name=hgvs_nt_column) + self.invalid_transgenic_column = pd.Series(["123A>X", "NM_001:123A>Y"], name=hgvs_nt_column) + self.mismatched_transgenic_column = pd.Series(["c.1T>G", "c.2A>G"], name=hgvs_nt_column) + + def test_parse_transgenic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) + assert valid + assert error is None + + def test_parse_valid_fully_qualified_transgenic_variant(self): + for variant_string in self.valid_fully_qualified_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=True + ) + assert valid + assert error is None + + def test_parse_valid_basic_transgenic_variant(self): + for variant_string in self.valid_basic_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert valid + assert error is None + + def test_parse_invalid_transgenic_variant(self): + for variant_string in self.invalid_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert not valid + assert "invalid variant string" in error + + def test_parse_mismatched_transgenic_variant(self): + for variant_string in self.mismatched_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert not valid + assert "target sequence mismatch" in error + + +class TestValidateObservedSequenceTypes(unittest.TestCase): + def setUp(self): + super().setUp() + + mock_valid_target1 = Mock() + mock_valid_target2 = Mock() + mock_valid_target1.sequence_type = "dna" + mock_valid_target1.sequence = "ATGC" + mock_valid_target2.sequence_type = "protein" + mock_valid_target2.sequence = "NM" + self.valid_targets = { + "NM_001": mock_valid_target1, + "NM_002": mock_valid_target2, + } + + mock_invalid_target1 = Mock() + mock_invalid_target2 = Mock() + mock_invalid_target1.sequence_type = "dna" + mock_invalid_target1.sequence = "ATGC" + mock_invalid_target2.sequence_type = "invalid" + mock_invalid_target2.sequence = "ABCD" + self.invalid_targets = { + "NM_001": mock_invalid_target1, + "NM_002": mock_invalid_target2, + } + + def test_validate_observed_sequence_types(self): + observed_sequence_types = validate_observed_sequence_types(self.valid_targets) + assert observed_sequence_types == ["dna", "protein"] + + def test_validate_invalid_observed_sequence_types(self): + with self.assertRaises(ValueError): + validate_observed_sequence_types(self.invalid_targets) + + def test_validate_observed_sequence_types_no_targets(self): + with self.assertRaises(ValueError): + validate_observed_sequence_types({}) + + +class TestValidateHgvsPrefixCombinations(unittest.TestCase): + def setUp(self): + super().setUp() + + self.valid_combinations = [ + ("c", None, None, False), + ("g", "n", None, False), + ("g", "c", "p", False), + ("n", None, None, True), + ] + + self.invalid_combinations = [ + ("n", "n", None, False), + ("c", "n", None, False), + ("g", "n", "p", False), + ("g", "c", None, False), + ("n", None, "p", False), + ("g", None, None, True), # invalid nucleotide prefix when transgenic + ] + + self.invalid_prefix_values = [ + ("x", None, None, False), # invalid nucleotide prefix + ("c", "x", None, False), # invalid splice prefix + ("c", None, "x", False), # invalid protein prefix + ] + + def test_valid_combinations(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.valid_combinations: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) + + def test_invalid_combinations(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.invalid_combinations: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) + + def test_invalid_prefix_values(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.invalid_prefix_values: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) diff --git a/tests/validation/test_dataframe.py b/tests/validation/test_dataframe.py deleted file mode 100644 index 378cdd7d..00000000 --- a/tests/validation/test_dataframe.py +++ /dev/null @@ -1,1121 +0,0 @@ -import itertools -from unittest import TestCase -from unittest.mock import patch - -import cdot.hgvs.dataproviders -import numpy as np -import pandas as pd -import pytest - -from mavedb.lib.validation.constants.general import ( - hgvs_nt_column, - hgvs_pro_column, - hgvs_splice_column, - required_score_column, -) -from mavedb.lib.validation.dataframe import ( - choose_dataframe_index_column, - generate_variant_prefixes, - infer_column_type, - sort_dataframe_columns, - standardize_dataframe, - validate_and_standardize_dataframe_pair, - validate_column_names, - validate_data_column, - validate_hgvs_genomic_column, - validate_hgvs_prefix_combinations, - validate_hgvs_transgenic_column, - validate_no_null_rows, - validate_variant_column, - validate_variant_columns_match, - validate_variant_formatting, -) -from mavedb.lib.validation.exceptions import ValidationError -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION - - -@pytest.fixture -def data_provider_class_attr(request, data_provider): - """ - Sets the `human_data_provider` attribute on the class from the requesting - test context to the `data_provider` fixture. This allows fixture use across - the `unittest.TestCase` class. - """ - request.cls.human_data_provider = data_provider - - -# Special DF Test Case that contains dummy data for tests below -@pytest.mark.usefixtures("data_provider_class_attr") -class DfTestCase(TestCase): - def setUp(self): - self.dataframe = pd.DataFrame( - { - hgvs_nt_column: ["g.1A>G", "g.1A>T"], - hgvs_splice_column: ["c.1A>G", "c.1A>T"], - hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], - required_score_column: [1.0, 2.0], - "extra": [12.0, 3.0], - "count1": [3.0, 5.0], - "count2": [9, 10], - "extra2": ["pathogenic", "benign"], - "mixed_types": ["test", 1.0], - "null_col": [None, None], - } - ) - - -class TestInferColumnType(TestCase): - def test_floats(self): - test_data = pd.Series([12.0, 1.0, -0.012, 5.75]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_ints(self): - test_data = pd.Series([12, 1, 0, -5]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_floats_with_na(self): - test_data = pd.Series([12.0, 1.0, None, -0.012, 5.75]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_ints_with_na(self): - test_data = pd.Series([12, 1, None, 0, -5]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_convertable_strings(self): - test_data = pd.Series(["12.5", 1.25, "0", "-5"]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_strings(self): - test_data = pd.Series(["hello", "test", "suite", "123abc"]) - self.assertEqual(infer_column_type(test_data), "string") - - def test_strings_with_na(self): - test_data = pd.Series(["hello", "test", None, "suite", "123abc"]) - self.assertEqual(infer_column_type(test_data), "string") - - def test_mixed(self): - test_data = pd.Series(["hello", 12.123, -75, "123abc"]) - self.assertEqual(infer_column_type(test_data), "mixed") - - def test_mixed_with_na(self): - test_data = pd.Series(["hello", None, 12.123, -75, "123abc"]) - self.assertEqual(infer_column_type(test_data), "mixed") - - def test_all_na(self): - test_data = pd.Series([None] * 5) - self.assertEqual(infer_column_type(test_data), "empty") - - -class TestSortDataframeColumns(DfTestCase): - def test_preserve_sorted(self): - sorted_df = sort_dataframe_columns(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe(self): - sorted_df = sort_dataframe_columns( - self.dataframe[ - [ - hgvs_splice_column, - "extra", - "count1", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count2", - "extra2", - "mixed_types", - "null_col", - ] - ] - ) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe_is_case_insensitive(self): - self.dataframe = self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}) - sorted_df = sort_dataframe_columns(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe_preserves_extras_order(self): - sorted_df = sort_dataframe_columns( - self.dataframe[ - [ - hgvs_splice_column, - "count2", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count1", - "extra2", - "extra", - "mixed_types", - ] - ] - ) - pd.testing.assert_frame_equal( - self.dataframe[ - [ - hgvs_nt_column, - hgvs_splice_column, - hgvs_pro_column, - required_score_column, - "count2", - "count1", - "extra2", - "extra", - "mixed_types", - ] - ], - sorted_df, - ) - - -class TestStandardizeDataframe(DfTestCase): - def test_preserve_standardized(self): - standardized_df = standardize_dataframe(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardize_changes_case_variants(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()})) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardice_changes_case_scores(self): - standardized_df = standardize_dataframe( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}) - ) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardize_preserves_extras_case(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={"extra": "extra".upper()})) - pd.testing.assert_frame_equal(self.dataframe.rename(columns={"extra": "extra".upper()}), standardized_df) - - def test_standardize_sorts_columns(self): - standardized_df = standardize_dataframe( - self.dataframe[ - [ - hgvs_splice_column, - "count2", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count1", - "extra", - ] - ] - ) - pd.testing.assert_frame_equal( - self.dataframe[ - [ - hgvs_nt_column, - hgvs_splice_column, - hgvs_pro_column, - required_score_column, - "count2", - "count1", - "extra", - ] - ], - standardized_df, - ) - - -class TestValidateStandardizeDataFramePair(DfTestCase): - def test_no_targets(self): - with self.assertRaises(ValueError): - validate_and_standardize_dataframe_pair( - self.dataframe, counts_df=None, targets=[], hdp=self.human_data_provider - ) - - # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok - - -class TestValidateDataColumn(DfTestCase): - def test_valid(self): - validate_data_column(self.dataframe[required_score_column]) - - def test_null_column(self): - self.dataframe[required_score_column] = None - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe[required_score_column]) - - def test_missing_data(self): - self.dataframe.loc[0, "extra"] = None - validate_data_column(self.dataframe["extra"]) - - def test_force_numeric(self): - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe["extra2"], force_numeric=True) - - def test_mixed_types_invalid(self): - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe["mixed_types"]) - - -class TestNullRows(DfTestCase): - def test_null_row(self): - self.dataframe.iloc[1, :] = None - with self.assertRaises(ValidationError): - validate_no_null_rows(self.dataframe) - - def test_valid(self): - validate_no_null_rows(self.dataframe) - - def test_only_hgvs_row(self): - self.dataframe.loc[1, [required_score_column, "extra", "count1", "count2"]] = None - validate_no_null_rows(self.dataframe) - - -class TestColumnNames(DfTestCase): - def test_only_two_kinds_of_dataframe(self): - with self.assertRaises(ValueError): - validate_column_names(self.dataframe, kind="score2") - - def test_score_df_has_score_column(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") - - def test_count_df_lacks_score_column(self): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") - - def test_count_df_has_score_column(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") - - def test_df_with_only_scores(self): - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") - - def test_count_df_must_have_data(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") - - def test_just_hgvs_nt(self): - validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") - validate_column_names( - self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" - ) - - def test_just_hgvs_pro(self): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" - ) - - def test_just_hgvs_pro_and_nt(self): - validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") - validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") - - def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" - ) - - def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") - - def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") - - def test_no_hgvs_column_scores(self): - with pytest.raises(ValidationError) as exc_info: - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" - ) - assert "dataframe does not define any variant columns" in str(exc_info.value) - - def test_no_hgvs_column_counts(self): - with pytest.raises(ValidationError) as exc_info: - validate_column_names( - self.dataframe.drop( - [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 - ), - kind="counts", - ) - assert "dataframe does not define any variant columns" in str(exc_info.value) - - def test_validation_ignores_column_ordering_scores(self): - validate_column_names( - self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" - ) - validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") - - def test_validation_ignores_column_ordering_counts(self): - validate_column_names( - self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" - ) - validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") - validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") - - def test_validation_is_case_insensitive(self): - validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") - validate_column_names( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" - ) - - def test_duplicate_hgvs_column_names_scores(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") - - def test_duplicate_hgvs_column_names_counts(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), - kind="counts", - ) - - def test_duplicate_score_column_names(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") - - def test_duplicate_data_column_names_scores(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") - - def test_duplicate_data_column_names_counts(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" - ) - - # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses - def test_invalid_column_names_scores(self): - invalid_values = [None, np.nan, "", " "] - for value in invalid_values: - with self.subTest(value=value): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") - - def test_invalid_column_names_counts(self): - invalid_values = [None, np.nan, "", " "] - for value in invalid_values: - with self.subTest(value=value): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename( - columns={hgvs_splice_column: value} - ), - kind="counts", - ) - - def test_ignore_column_ordering_scores(self): - validate_column_names( - self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], - kind="scores", - ) - - def test_ignore_column_ordering_counts(self): - validate_column_names( - self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], - kind="counts", - ) - - -class TestChooseDataframeIndexColumn(DfTestCase): - def setUp(self): - super().setUp() - - def test_nt_index_column(self): - index = choose_dataframe_index_column(self.dataframe) - assert index == hgvs_nt_column - - def test_pro_index_column(self): - index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) - assert index == hgvs_pro_column - - def test_no_valid_index_column(self): - with self.assertRaises(ValidationError): - choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) - - -class TestValidateHgvsPrefixCombinations(TestCase): - def setUp(self): - self.valid_combinations = [ - ("g", "c", "p"), - ("m", "c", "p"), - ("o", "c", "p"), - ("g", "n", None), - ("m", "n", None), - ("o", "n", None), - ("n", None, None), - ("c", None, "p"), - (None, None, "p"), - (None, None, None), # valid for this validator, but a dataframe with no variants should be caught upstream - ] - self.invalid_combinations = [ - t - for t in itertools.product(("c", "n", "g", "m", "o", None), ("c", "n", None), ("p", None)) - if t not in self.valid_combinations - ] - - def test_valid_combinations(self): - for t in self.valid_combinations: - with self.subTest(t=t): - validate_hgvs_prefix_combinations(*t, True) - - def test_invalid_combinations(self): - for t in self.invalid_combinations: - with self.subTest(t=t): - with self.assertRaises(ValidationError): - validate_hgvs_prefix_combinations(*t, True) - - # TODO: biocommons.HGVS validation clashes here w/ our custom validators: - # n. prefix is the problematic one, for now. - @pytest.mark.skip() - def test_invalid_combinations_biocommons(self): - for t in self.invalid_combinations: - with self.subTest(t=t): - with self.assertRaises(ValidationError): - validate_hgvs_prefix_combinations(*t, False) - - def test_invalid_combinations_value_error_nt(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("p", None, None, True) - - def test_invalid_combinations_value_error_nt_pro(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("c", None, "P", True) - - def test_invalid_combinations_value_error_splice(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("x", "c", "p", True) - - -class TestValidateVariantFormatting(TestCase): - def setUp(self) -> None: - super().setUp() - - self.valid = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) - self.inconsistent = pd.Series(["g.1A>G", "c.1A>T"], name=hgvs_nt_column) - self.valid_prefixes = ["g."] - self.invalid_prefixes = ["c."] - self.valid_target = ["single_target"] - - self.valid_multi = pd.Series(["test1:g.1A>G", "test2:g.1A>T"], name=hgvs_nt_column) - self.invalid_multi = pd.Series(["test3:g.1A>G", "test3:g.1A>T"], name=hgvs_nt_column) - self.inconsistent_multi = pd.Series(["test1:g.1A>G", "test2:c.1A>T"], name=hgvs_nt_column) - self.valid_targets = ["test1", "test2"] - - def test_single_target_valid_variants(self): - validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_target, False) - - def test_single_target_inconsistent_variants(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.inconsistent, self.valid_prefixes, self.valid_target, False) - - def test_single_target_invalid_prefixes(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid, self.invalid_prefixes, self.valid_target, False) - - def test_multi_target_valid_variants(self): - validate_variant_formatting(self.valid_multi, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_inconsistent_variants(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.inconsistent_multi, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_invalid_prefixes(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid_multi, self.invalid_prefixes, self.valid_targets, True) - - def test_multi_target_lacking_full_coords(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_invalid_accessions(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.invalid_multi, self.valid_prefixes, self.valid_targets, True) - - -class TestGenerateVariantPrefixes(DfTestCase): - def setUp(self): - super().setUp() - - self.nt_prefixes = ["c.", "n.", "g.", "m.", "o."] - self.splice_prefixes = ["c.", "n."] - self.pro_prefixes = ["p."] - - def test_nt_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_nt_column]) - assert prefixes == self.nt_prefixes - - def test_pro_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_pro_column]) - assert prefixes == self.pro_prefixes - - def test_splice_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_splice_column]) - assert prefixes == self.splice_prefixes - - def test_unrecognized_column_prefixes(self): - with self.assertRaises(ValueError): - generate_variant_prefixes(self.dataframe["extra"]) - - -class TestValidateVariantColumn(DfTestCase): - def setUp(self): - super().setUp() - - def test_invalid_column_type_index(self): - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe[required_score_column], True) - - def test_invalid_column_type(self): - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe[required_score_column], False) - - def test_null_values_type_index(self): - self.dataframe[hgvs_nt_column].iloc[1] = pd.NA - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe.iloc[0, :], True) - - def test_null_values_type(self): - self.dataframe[hgvs_nt_column].iloc[1] = pd.NA - validate_variant_column(self.dataframe[hgvs_nt_column], False) - - def test_nonunique_values_index(self): - self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe["dup_col"], True) - - def test_nonunique_values(self): - self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] - validate_variant_column(self.dataframe["dup_col"], False) - - def test_variant_column_is_valid(self): - validate_variant_column(self.dataframe[hgvs_nt_column], True) - - -class TestValidateVariantColumnsMatch(DfTestCase): - def test_same_df(self): - validate_variant_columns_match(self.dataframe, self.dataframe) - - def test_ignore_order(self): - validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) - - def test_missing_column(self): - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) - - def test_missing_variant(self): - df2 = self.dataframe.copy() - df2.loc[0, hgvs_pro_column] = None - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe, df2) - - -# Spoof the target sequence type -class NucleotideSequenceTestCase: - def __init__(self): - self.sequence = "ATG" - self.sequence_type = "dna" - - -class ProteinSequenceTestCase: - def __init__(self): - self.sequence = "MTG" - self.sequence_type = "protein" - - -class TestValidateTransgenicColumn(DfTestCase): - def setUp(self): - super().setUp() - - self.valid_hgvs_columns = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), - pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_splice_column), - pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_nt_only = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), - pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), - ] - - self.valid_hgvs_columns_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_splice_column), - pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_nt_only_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), - ] - - self.valid_hgvs_columns_invalid_names = [ - pd.Series(["g.1A>G", "g.1A>T"], name="invalid_column_name"), - pd.Series(["p.Met1Val", "p.Met1Leu"], name="invalid_column_name"), - ] - - self.valid_hgvs_columns_invalid_names_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name="invalid_column_name"), - pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name="invalid_column_name"), - ] - - self.valid_hgvs_columns_invalid_for_index = [ - # missing data - pd.Series(["c.1A>G", None], name=hgvs_nt_column), - pd.Series([None, "p.Met1Val"], name=hgvs_pro_column), - pd.Series([None, None], name=hgvs_nt_column), - pd.Series([None, None], name=hgvs_pro_column), - # duplicate rows - pd.Series(["c.1A>G", "c.1A>G"], name=hgvs_nt_column), - pd.Series(["p.Met1Val", "p.Met1Val"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_invalid_for_index_multi_target = [ - # missing data - pd.Series(["test_nt:c.1A>G", None], name=hgvs_nt_column), - pd.Series([None, "test_pt:p.Met1Val"], name=hgvs_pro_column), - pd.Series([None, None], name=hgvs_nt_column), - pd.Series([None, None], name=hgvs_pro_column), - # duplicate rows - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>G"], name=hgvs_nt_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Val"], name=hgvs_pro_column), - ] - - self.invalid_hgvs_columns_by_name = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_splice_column), - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_pro_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_pro_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_pro_column), - pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_name_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_splice_column), - pd.Series(["test_pt:g.1A>G", "test_pt:g.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_nt_column), - pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_contents = [ - pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_splice_column), # rna not allowed - pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_nt_column), # rna not allowed - pd.Series(["c.1A>G", "c.5A>T"], name=hgvs_nt_column), # out of bounds for target - pd.Series(["c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series(["p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series(["n.1A>G", "c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix - pd.Series(["c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - self.invalid_hgvs_columns_by_contents_multi_target = [ - pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_splice_column), # rna not allowed - pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # rna not allowed - pd.Series(["bad_label:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # invalid label - pd.Series(["test_nt:c.1A>G", "test_nt:c.5A>T"], name=hgvs_nt_column), # out of bounds for target - pd.Series(["test_nt:c.1A>G", "test_nt:_wt"], name=hgvs_nt_column), # old special variant - pd.Series(["test_pt:p.Met1Leu", "test_nt:_sy"], name=hgvs_pro_column), # old special variant - pd.Series(["test_nt:n.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series(["test_nt:c.1A>G", "test_pt:p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix - pd.Series(["test_pt:c.1A>G", "bad_label:p.Met1Leu"], name=hgvs_pro_column), # invalid label - pd.Series(["test_nt:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - self.nt_sequence_test_case = NucleotideSequenceTestCase() - self.pt_sequence_test_case = ProteinSequenceTestCase() - - def test_valid_columns_single_target(self): - for column in self.valid_hgvs_columns: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_multi_target(self): - for column in self.valid_hgvs_columns_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - # Test when supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) - def test_valid_columns_invalid_supplied_targets(self): - for column in self.valid_hgvs_columns_nt_only: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - # Test when multiple supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) - def test_valid_columns_invalid_supplied_targets_multi_target(self): - for column in self.valid_hgvs_columns_nt_only_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_pt": self.pt_sequence_test_case, "test_pt_2": self.pt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_invalid_column_name(self): - for column in self.valid_hgvs_columns_invalid_names: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_invalid_column_name_multi_target(self): - for column in self.valid_hgvs_columns_invalid_names_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_index_columns(self): - for column in self.valid_hgvs_columns: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_index_columns_multi_target(self): - for column in self.valid_hgvs_columns_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_invalid_column_values(self): - for column in self.invalid_hgvs_columns_by_contents: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_invalid_column_values_multi_target(self): - for column in self.invalid_hgvs_columns_by_contents_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_valid_column_values_wrong_column_name(self): - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_column_values_wrong_column_name_multi_target(self): - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - -# Spoof the accession type -class AccessionTestCase: - def __init__(self): - self.accession = VALID_ACCESSION - - -class TestValidateHgvsGenomicColumn(DfTestCase): - def setUp(self): - super().setUp() - - self.accession_test_case = AccessionTestCase() - - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column - ) - self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) - self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) - - self.invalid_hgvs_columns_by_name = [ - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_contents = [ - pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column - ), # rna not allowed - pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column - ), # rna not allowed - pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column - ), # out of bounds for target - pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column - ), # mixed types/prefix - pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified - pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - def test_valid_variant(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_valid_missing(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_valid_duplicate(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_index(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_invalid_missing_index(self): - with ( - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_invalid_duplicate_index(self): - with ( - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - self.duplicate_data, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_invalid_column_values(self): - for column in self.invalid_hgvs_columns_by_contents: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=False, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=True, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - - def test_valid_column_values_wrong_column_name(self): - for column in self.invalid_hgvs_columns_by_name: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=False, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=True, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - - # TODO: Test multiple targets diff --git a/tests/view_models/test_experiment.py b/tests/view_models/test_experiment.py index 77e9e472..381ea7a1 100644 --- a/tests/view_models/test_experiment.py +++ b/tests/view_models/test_experiment.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.experiment import ExperimentCreate from tests.helpers.constants import TEST_MINIMAL_EXPERIMENT @@ -7,7 +6,7 @@ # Test valid experiment def test_create_experiment(): - experiment = ExperimentCreate(**jsonable_encoder(TEST_MINIMAL_EXPERIMENT)) + experiment = ExperimentCreate(**TEST_MINIMAL_EXPERIMENT) assert experiment.title == "Test Experiment Title" assert experiment.short_description == "Test experiment" assert experiment.abstract_text == "Abstract" @@ -16,9 +15,9 @@ def test_create_experiment(): def test_cannot_create_experiment_without_a_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) + experiment.pop("title") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -26,11 +25,10 @@ def test_cannot_create_experiment_without_a_title(): def test_cannot_create_experiment_with_a_space_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) - invalid_experiment["title"] = " " + experiment["title"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "title" in str(exc_info.value) @@ -38,11 +36,10 @@ def test_cannot_create_experiment_with_a_space_title(): def test_cannot_create_experiment_with_an_empty_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) - invalid_experiment["title"] = "" + experiment["title"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -50,10 +47,10 @@ def test_cannot_create_experiment_with_an_empty_title(): def test_cannot_create_experiment_without_a_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) + experiment.pop("shortDescription") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -61,11 +58,10 @@ def test_cannot_create_experiment_without_a_short_description(): def test_cannot_create_experiment_with_a_space_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) - invalid_experiment["shortDescription"] = " " + experiment["shortDescription"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -73,11 +69,10 @@ def test_cannot_create_experiment_with_a_space_short_description(): def test_cannot_create_experiment_with_an_empty_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) - invalid_experiment["shortDescription"] = "" + experiment["shortDescription"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -85,10 +80,10 @@ def test_cannot_create_experiment_with_an_empty_short_description(): def test_cannot_create_experiment_without_an_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) + experiment.pop("abstractText") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -96,11 +91,10 @@ def test_cannot_create_experiment_without_an_abstract(): def test_cannot_create_experiment_with_a_space_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) - invalid_experiment["abstractText"] = " " + experiment["abstractText"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -108,11 +102,10 @@ def test_cannot_create_experiment_with_a_space_abstract(): def test_cannot_create_experiment_with_an_empty_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) - invalid_experiment["abstractText"] = "" + experiment["abstractText"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -120,10 +113,10 @@ def test_cannot_create_experiment_with_an_empty_abstract(): def test_cannot_create_experiment_without_a_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) + experiment.pop("methodText") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -131,11 +124,10 @@ def test_cannot_create_experiment_without_a_method(): def test_cannot_create_experiment_with_a_space_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) - invalid_experiment["methodText"] = " " + experiment["methodText"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -143,11 +135,10 @@ def test_cannot_create_experiment_with_a_space_method(): def test_cannot_create_experiment_with_an_empty_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) - invalid_experiment["methodText"] = "" + experiment["methodText"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "methodText" in str(exc_info.value) diff --git a/tests/view_models/test_external_gene_identifiers.py b/tests/view_models/test_external_gene_identifiers.py index 5632975a..a2249c70 100644 --- a/tests/view_models/test_external_gene_identifiers.py +++ b/tests/view_models/test_external_gene_identifiers.py @@ -4,7 +4,7 @@ from mavedb.view_models.external_gene_identifier_offset import ExternalGeneIdentifierOffsetCreate -def test_create_ensemble_identifier(client): +def test_create_ensemble_identifier(): # Test valid identifier db_name = "Ensembl" identifier = "ENSG00000103275" @@ -13,7 +13,7 @@ def test_create_ensemble_identifier(client): assert externalIdentifier.identifier == "ENSG00000103275" -def test_create_invalid_ensemble_identifier(client): +def test_create_invalid_ensemble_identifier(): # Test valid identifier db_name = "Ensembl" invalid_identifier = "not_an_identifier" @@ -22,7 +22,7 @@ def test_create_invalid_ensemble_identifier(client): assert "'not_an_identifier' is not a valid Ensembl accession." in str(exc_info.value) -def test_create_uniprot_identifier(client): +def test_create_uniprot_identifier(): db_name = "UniProt" identifier = "P63279" externalIdentifier = ExternalGeneIdentifierCreate(db_name=db_name, identifier=identifier) @@ -30,7 +30,7 @@ def test_create_uniprot_identifier(client): assert externalIdentifier.identifier == "P63279" -def test_create_invalid_uniprot_identifier(client): +def test_create_invalid_uniprot_identifier(): db_name = "UniProt" invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -38,7 +38,7 @@ def test_create_invalid_uniprot_identifier(client): assert "'not_an_identifier' is not a valid UniProt accession." in str(exc_info.value) -def test_create_refseq_identifier(client): +def test_create_refseq_identifier(): db_name = "RefSeq" identifier = "NM_003345" externalIdentifier = ExternalGeneIdentifierCreate(db_name=db_name, identifier=identifier) @@ -46,7 +46,7 @@ def test_create_refseq_identifier(client): assert externalIdentifier.identifier == "NM_003345" -def test_create_invalid_refseq_identifier(client): +def test_create_invalid_refseq_identifier(): db_name = "RefSeq" invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -54,7 +54,7 @@ def test_create_invalid_refseq_identifier(client): assert "'not_an_identifier' is not a valid RefSeq accession." in str(exc_info.value) -def test_empty_db_name(client): +def test_empty_db_name(): db_name = "" identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -62,7 +62,7 @@ def test_empty_db_name(client): assert "none is not an allowed value" in str(exc_info.value) -def test_space_db_name(client): +def test_space_db_name(): db_name = " " identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -70,7 +70,7 @@ def test_space_db_name(client): assert "db_name should not be empty" in str(exc_info.value) -def test_none_db_name(client): +def test_none_db_name(): db_name = None identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -78,7 +78,7 @@ def test_none_db_name(client): assert "none is not an allowed value" in str(exc_info.value) -def test_invalid_db_name(client): +def test_invalid_db_name(): db_name = "Invalid" identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -89,13 +89,13 @@ def test_invalid_db_name(client): ) -def test_create_identifier_with_offset(client): +def test_create_identifier_with_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} externalIdentifier = ExternalGeneIdentifierOffsetCreate(identifier=identifier, offset=1) assert externalIdentifier.offset == 1 -def test_create_identifier_with_string_offset(client): +def test_create_identifier_with_string_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} offset = "invalid" with pytest.raises(ValueError) as exc_info: @@ -103,7 +103,7 @@ def test_create_identifier_with_string_offset(client): assert "value is not a valid integer" in str(exc_info.value) -def test_create_identifier_with_negative_offset(client): +def test_create_identifier_with_negative_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} with pytest.raises(ValueError) as exc_info: ExternalGeneIdentifierOffsetCreate(identifier=identifier, offset=-10) diff --git a/tests/view_models/test_publication_identifier.py b/tests/view_models/test_publication_identifier.py index b65f9110..f516f87c 100644 --- a/tests/view_models/test_publication_identifier.py +++ b/tests/view_models/test_publication_identifier.py @@ -3,42 +3,42 @@ from mavedb.view_models.publication_identifier import PublicationIdentifierCreate -def test_publication_identifier_create_pubmed_validator(client): +def test_publication_identifier_create_pubmed_validator(): # Test valid pubmed identifier valid_identifier = "20711111" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "20711111" -def test_publication_identifier_create_new_biorxiv_validator(client): +def test_publication_identifier_create_new_biorxiv_validator(): # Test valid new form of biorxiv identifier valid_identifier = "2019.12.12.207222" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "2019.12.12.207222" -def test_publication_identifier_create_old_biorxiv_validator(client): +def test_publication_identifier_create_old_biorxiv_validator(): # Test valid old form of biorxiv identifier valid_identifier = "207222" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "207222" -def test_publication_identifier_create_new_medrxiv_validator(client): +def test_publication_identifier_create_new_medrxiv_validator(): # Test valid new form of medrxiv identifier valid_identifier = "2019.12.12.20733333" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "2019.12.12.20733333" -def test_publication_identifier_create_old_medrxiv_validator(client): +def test_publication_identifier_create_old_medrxiv_validator(): # Test valid old form of medrxiv identifier (this is the same format as pubmed identifiers) valid_identifier = "20733333" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "20733333" -def test_invalid_publication_identifier_create_validator(client): +def test_invalid_publication_identifier_create_validator(): # Test invalid identifier invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -48,7 +48,7 @@ def test_invalid_publication_identifier_create_validator(client): ) -def test_invalid_publication_identifier_date_part_create_validator(client): +def test_invalid_publication_identifier_date_part_create_validator(): # Test invalid identifier (date too early on bioRxiv identifier) invalid_identifier = "2018.12.12.207222" with pytest.raises(ValueError) as exc_info: diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index a47c3242..1247020c 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.publication_identifier import PublicationIdentifierCreate from mavedb.view_models.score_set import ScoreSetCreate, ScoreSetModify @@ -9,24 +8,25 @@ def test_cannot_create_score_set_without_a_target(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test, exclude={"targetGenes"}), target_genes=[]) + ScoreSetModify(**score_set_test, target_genes=[]) assert "Score sets should define at least one target." in str(exc_info.value) def test_cannot_create_score_set_with_multiple_primary_publications(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + target_genes = score_set_test.pop("targetGenes") identifier_one = PublicationIdentifierCreate(identifier="2019.12.12.207222") identifier_two = PublicationIdentifierCreate(identifier="2019.12.12.20733333") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test), - exclude={"targetGenes"}, - target_genes=[TargetGeneCreate(**jsonable_encoder(target)) for target in score_set_test["targetGenes"]], + **score_set_test, + target_genes=[TargetGeneCreate(**target) for target in target_genes], primary_publication_identifiers=[identifier_one, identifier_two], ) @@ -36,12 +36,13 @@ def test_cannot_create_score_set_with_multiple_primary_publications(): def test_cannot_create_score_set_without_target_gene_labels_when_multiple_targets_exist(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - target_gene_one = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) - target_gene_two = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test, exclude={"targetGenes"}), + **score_set_test, target_genes=[target_gene_one, target_gene_two], ) @@ -51,16 +52,17 @@ def test_cannot_create_score_set_without_target_gene_labels_when_multiple_target def test_cannot_create_score_set_with_non_unique_target_labels(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - target_gene_one = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) - target_gene_two = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) non_unique = "BRCA1" target_gene_one.target_sequence.label = non_unique target_gene_two.target_sequence.label = non_unique + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test, exclude={"targetGenes"}), + **score_set_test, target_genes=[target_gene_one, target_gene_two], ) @@ -69,9 +71,10 @@ def test_cannot_create_score_set_with_non_unique_target_labels(): def test_cannot_create_score_set_without_a_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) + score_set.pop("title") + with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -79,11 +82,10 @@ def test_cannot_create_score_set_without_a_title(): def test_cannot_create_score_set_with_a_space_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) - invalid_score_set["title"] = " " + score_set["title"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "title" in str(exc_info.value) @@ -91,11 +93,10 @@ def test_cannot_create_score_set_with_a_space_title(): def test_cannot_create_score_set_with_an_empty_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) - invalid_score_set["title"] = "" + score_set["title"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -103,10 +104,10 @@ def test_cannot_create_score_set_with_an_empty_title(): def test_cannot_create_score_set_without_a_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) + score_set.pop("shortDescription") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -114,11 +115,10 @@ def test_cannot_create_score_set_without_a_short_description(): def test_cannot_create_score_set_with_a_space_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) - invalid_score_set["shortDescription"] = " " + score_set["shortDescription"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -126,11 +126,10 @@ def test_cannot_create_score_set_with_a_space_short_description(): def test_cannot_create_score_set_with_an_empty_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) - invalid_score_set["shortDescription"] = "" + score_set["shortDescription"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -138,10 +137,10 @@ def test_cannot_create_score_set_with_an_empty_short_description(): def test_cannot_create_score_set_without_an_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) + score_set.pop("abstractText") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -149,11 +148,10 @@ def test_cannot_create_score_set_without_an_abstract(): def test_cannot_create_score_set_with_a_space_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) - invalid_score_set["abstractText"] = " " + score_set["abstractText"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -161,11 +159,10 @@ def test_cannot_create_score_set_with_a_space_abstract(): def test_cannot_create_score_set_with_an_empty_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) - invalid_score_set["abstractText"] = "" + score_set["abstractText"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -173,10 +170,10 @@ def test_cannot_create_score_set_with_an_empty_abstract(): def test_cannot_create_score_set_without_a_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) + score_set.pop("methodText") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -184,11 +181,10 @@ def test_cannot_create_score_set_without_a_method(): def test_cannot_create_score_set_with_a_space_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) - invalid_score_set["methodText"] = " " + score_set["methodText"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -196,11 +192,10 @@ def test_cannot_create_score_set_with_a_space_method(): def test_cannot_create_score_set_with_an_empty_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) - invalid_score_set["methodText"] = "" + score_set["methodText"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -217,7 +212,7 @@ def test_cannot_create_score_set_with_too_many_boundaries(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Only a lower and upper bound are allowed." in str(exc_info.value) @@ -233,7 +228,7 @@ def test_cannot_create_score_set_with_overlapping_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -249,7 +244,7 @@ def test_can_create_score_set_with_mixed_range_types(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_adjacent_ranges(): @@ -262,7 +257,7 @@ def test_can_create_score_set_with_adjacent_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_flipped_adjacent_ranges(): @@ -275,7 +270,7 @@ def test_can_create_score_set_with_flipped_adjacent_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_adjacent_negative_ranges(): @@ -288,7 +283,7 @@ def test_can_create_score_set_with_adjacent_negative_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_flipped_adjacent_negative_ranges(): @@ -301,7 +296,7 @@ def test_can_create_score_set_with_flipped_adjacent_negative_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_cannot_create_score_set_with_overlapping_upper_unbounded_ranges(): @@ -315,7 +310,7 @@ def test_cannot_create_score_set_with_overlapping_upper_unbounded_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -331,7 +326,7 @@ def test_cannot_create_score_set_with_overlapping_lower_unbounded_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -347,7 +342,7 @@ def test_cannot_create_score_set_with_backwards_bounds(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "The lower bound of the score range may not be larger than the upper bound." in str(exc_info.value) @@ -362,7 +357,7 @@ def test_cannot_create_score_set_with_equal_bounds(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "The lower and upper bound of the score range may not be the same." in str(exc_info.value) @@ -378,7 +373,7 @@ def test_cannot_create_score_set_with_duplicate_range_labels(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Detected repeated label: `range_1`. Range labels must be unique." in str(exc_info.value) @@ -394,7 +389,7 @@ def test_cannot_create_score_set_with_duplicate_range_labels_whitespace(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Detected repeated label: `range_1`. Range labels must be unique." in str(exc_info.value) @@ -411,7 +406,7 @@ def test_cannot_create_score_set_with_wild_type_outside_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert ( f"The provided wild type score of {wt_score} is not within any of the provided normal ranges. This score should be within a normal range." @@ -431,7 +426,7 @@ def test_cannot_create_score_set_with_wild_type_outside_normal_range(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert ( f"The provided wild type score of {wt_score} is not within any of the provided normal ranges. This score should be within a normal range." @@ -450,7 +445,7 @@ def test_cannot_create_score_set_with_wild_type_score_and_no_normal_range(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "A wild type score has been provided, but no normal classification range exists." in str(exc_info.value) @@ -480,7 +475,7 @@ def test_cannot_create_score_set_without_default_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) diff --git a/tests/view_models/test_user.py b/tests/view_models/test_user.py index b72d0d5d..8650f343 100644 --- a/tests/view_models/test_user.py +++ b/tests/view_models/test_user.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.user import CurrentUserUpdate from tests.helpers.constants import TEST_USER @@ -7,6 +6,8 @@ # There are lots of potentially invalid emails, but this test is intented to ensure # the validator is active, so just use a simple one. -def test_cannot_update_user_with_invalid_email(client): +def test_cannot_update_user_with_invalid_email(): + user = TEST_USER.copy() + user["email"] = "invalidemail@" with pytest.raises(ValueError): - CurrentUserUpdate(**jsonable_encoder(TEST_USER, exclude={"email"}), email="invalidemail@") + CurrentUserUpdate(**user) diff --git a/tests/view_models/test_wild_type_sequence.py b/tests/view_models/test_wild_type_sequence.py index 25415fc1..47401871 100644 --- a/tests/view_models/test_wild_type_sequence.py +++ b/tests/view_models/test_wild_type_sequence.py @@ -28,21 +28,21 @@ ("Protein", "startrek"), ], ) -def test_create_wild_type_sequence(client, sequence_type, sequence): +def test_create_wild_type_sequence(sequence_type, sequence): TargetSeq = TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert TargetSeq.sequence_type == sequence_type.lower() assert TargetSeq.sequence == sequence.upper() @pytest.mark.parametrize("sequence_type, sequence", [("dnaaa", "ATGAGTATTCAACATTTCCGTGTC"), ("null", "STARTREK")]) -def test_create_invalid_sequence_type(client, sequence_type, sequence): +def test_create_invalid_sequence_type(sequence_type, sequence): with pytest.raises(ValueError) as exc_info: TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert f"'{sequence_type}' is not a valid sequence type" in str(exc_info.value) @pytest.mark.parametrize("sequence_type, sequence", [("dna", "ARCG"), ("protein", "AzCG")]) -def test_create_invalid_sequence(client, sequence_type, sequence): +def test_create_invalid_sequence(sequence_type, sequence): with pytest.raises(ValueError) as exc_info: TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert f"invalid {sequence_type} sequence provided" in str(exc_info.value) diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 7d989005..fedf2f1f 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -6,8 +6,8 @@ from mavedb.models.license import License from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User + from tests.helpers.constants import EXTRA_USER, TEST_LICENSE, TEST_INACTIVE_LICENSE, TEST_TAXONOMY, TEST_USER -from tests.helpers.util import create_experiment, create_seq_score_set @pytest.fixture @@ -21,15 +21,6 @@ def setup_worker_db(session): db.commit() -@pytest.fixture -def populate_worker_db(data_files, client): - # create score set via API. In production, the API would invoke this worker job - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - - return score_set["urn"] - - @pytest.fixture def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 18e0846a..106b77b2 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -1,17 +1,20 @@ +# ruff: noqa: E402 + from asyncio.unix_events import _UnixSelectorEventLoop from copy import deepcopy from datetime import date from unittest.mock import patch from uuid import uuid4 -import arq.jobs -import cdot.hgvs.dataproviders import jsonschema import pandas as pd import pytest -from arq import ArqRedis from sqlalchemy import not_, select +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.data_providers.services import VRSMap from mavedb.lib.mave.constants import HGVS_NT_COLUMN from mavedb.lib.score_sets import csv_data_to_df @@ -31,6 +34,8 @@ map_variants_for_score_set, variant_mapper_manager, ) + + from tests.helpers.constants import ( TEST_CDOT_TRANSCRIPT, TEST_MINIMAL_ACC_SCORESET, @@ -39,7 +44,18 @@ TEST_VARIANT_MAPPING_SCAFFOLD, VALID_ACCESSION, ) -from tests.helpers.util import awaitable_exception +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set + + +@pytest.fixture +def populate_worker_db(data_files, client): + # create score set via API. In production, the API would invoke this worker job + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + return score_set["urn"] async def setup_records_and_files(async_client, data_files, input_score_set): @@ -815,7 +831,7 @@ async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_faile "run_in_executor", return_value=awaitable_exception(), ), - patch.object(ArqRedis, "lpush", awaitable_exception()), + patch.object(arq.ArqRedis, "lpush", awaitable_exception()), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -909,7 +925,7 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch.object(ArqRedis, "lpush", awaitable_exception()), + patch.object(arq.ArqRedis, "lpush", awaitable_exception()), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -1033,7 +1049,7 @@ async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_co @pytest.mark.asyncio async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(ArqRedis, "rpop", Exception()): + with patch.object(arq.ArqRedis, "rpop", Exception()): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. @@ -1115,7 +1131,7 @@ async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_e await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") with ( patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), ): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) @@ -1143,7 +1159,7 @@ async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_duri await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") with ( patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), ): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) From b257447e53097b0b6885f14c5051c0b631beb8e5 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:27:01 -0800 Subject: [PATCH 004/166] Bump Dependencies --- poetry.lock | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8f9ea2a8..0084bab7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -688,13 +688,13 @@ crt = ["awscrt (==0.21.2)"] [[package]] name = "botocore-stubs" -version = "1.37.15" +version = "1.37.16" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.8" files = [ - {file = "botocore_stubs-1.37.15-py3-none-any.whl", hash = "sha256:70ef39669f3b9421c20295535aaeb81aa62d6a90969fb631caabe480fe11af0c"}, - {file = "botocore_stubs-1.37.15.tar.gz", hash = "sha256:055525b345cac085b4607335b13744756a3d43a4b7025b2e977d1c139b15c31b"}, + {file = "botocore_stubs-1.37.16-py3-none-any.whl", hash = "sha256:33973ee0e54ad5bf9f8560b2c36fc532b98540af6b9d4a57ffce5ae62a743a2a"}, + {file = "botocore_stubs-1.37.16.tar.gz", hash = "sha256:532376611ae0c49488b7bdac3674da9ac0de9a6c65198432790b11da41502caf"}, ] [package.dependencies] @@ -1783,13 +1783,13 @@ type = ["pytest-mypy"] [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] [[package]] @@ -2474,19 +2474,19 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.3.6" +version = "4.3.7" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, - {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, + {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, + {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, ] [package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.11.2)"] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" @@ -3286,13 +3286,13 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "76.1.0" +version = "77.0.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-76.1.0-py3-none-any.whl", hash = "sha256:34750dcb17d046929f545dec9b8349fe42bf4ba13ddffee78428aec422dbfb73"}, - {file = "setuptools-76.1.0.tar.gz", hash = "sha256:4959b9ad482ada2ba2320c8f1a8d8481d4d8d668908a7a1b84d987375cd7f5bd"}, + {file = "setuptools-77.0.1-py3-none-any.whl", hash = "sha256:81a234dff81a82bb52e522c8aef145d0dd4de1fd6de4d3b196d0f77dc2fded26"}, + {file = "setuptools-77.0.1.tar.gz", hash = "sha256:a1246a1b4178c66d7cf50c9fc6d530fac3f89bc284cf803c7fa878c41b1a03b2"}, ] [package.extras] From a2c28e16201e0d26571273ac05cbe94c7aad8dbf Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:32:32 -0800 Subject: [PATCH 005/166] Check for Nonetype Target Sequences to Silence MyPy Error --- src/mavedb/lib/validation/dataframe/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index eb81873d..4b784994 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -258,7 +258,7 @@ def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list if not targets: raise ValueError("No targets were provided; cannot validate observed sequence types with none observed.") - observed_sequence_types = [target.sequence_type for target in targets.values()] + observed_sequence_types = [target.sequence_type for target in targets.values() if target.sequence_type is not None] invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) if invalid_sequence_types: raise ValueError( From 4a65b2deac20e55b262c38a8a5f2204664837ee5 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 21:53:14 -0800 Subject: [PATCH 006/166] Replace DataSet Columns Setter in Worker Variant Mocker --- tests/helpers/util/variant.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py index 57bcd436..95720cac 100644 --- a/tests/helpers/util/variant.py +++ b/tests/helpers/util/variant.py @@ -7,7 +7,7 @@ from sqlalchemy import select from unittest.mock import patch -from mavedb.lib.score_sets import create_variants, create_variants_data, csv_data_to_df +from mavedb.lib.score_sets import create_variants, columns_for_dataset, create_variants_data, csv_data_to_df from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.enums.mapping_state import MappingState @@ -73,6 +73,10 @@ def mock_worker_variant_insertion( assert num_variants == 3 item.processing_state = ProcessingState.success + item.dataset_columns = { + "score_columns": columns_for_dataset(scores), + "count_columns": columns_for_dataset(counts), + } db.add(item) db.commit() From 7c93e199452298c6d97797da1531a24bc5a93071 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sun, 2 Mar 2025 14:20:06 -0800 Subject: [PATCH 007/166] Add Base Editor Column to Target Accessions Table --- ...3b_add_is_base_editor_column_to_target_.py | 31 +++++++++++++++++ .../lib/validation/constants/general.py | 1 + src/mavedb/models/target_accession.py | 3 +- src/mavedb/view_models/target_accession.py | 1 + tests/helpers/constants.py | 34 +++++++++++++++++-- 5 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py diff --git a/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py new file mode 100644 index 00000000..041edda4 --- /dev/null +++ b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py @@ -0,0 +1,31 @@ +"""Add is_base_editor column to target_accessions + +Revision ID: f69b4049bc3b +Revises: c404b6719110 +Create Date: 2025-03-02 14:06:52.217554 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "f69b4049bc3b" +down_revision = "c404b6719110" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "target_accessions", sa.Column("is_base_editor", sa.Boolean(), nullable=False, server_default="false") + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("target_accessions", "is_base_editor") + # ### end Alembic commands ### diff --git a/src/mavedb/lib/validation/constants/general.py b/src/mavedb/lib/validation/constants/general.py index 94e5a844..92b4fd5b 100644 --- a/src/mavedb/lib/validation/constants/general.py +++ b/src/mavedb/lib/validation/constants/general.py @@ -35,6 +35,7 @@ hgvs_nt_column = "hgvs_nt" hgvs_splice_column = "hgvs_splice" hgvs_pro_column = "hgvs_pro" +guide_sequence_column = "guide_sequence" hgvs_columns = sorted([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column]) meta_data = "meta_data" score_columns = "score_columns" diff --git a/src/mavedb/models/target_accession.py b/src/mavedb/models/target_accession.py index e054a50f..9e176888 100644 --- a/src/mavedb/models/target_accession.py +++ b/src/mavedb/models/target_accession.py @@ -1,6 +1,6 @@ from datetime import date -from sqlalchemy import Column, Date, Integer, String +from sqlalchemy import Boolean, Column, Date, Integer, String from mavedb.db.base import Base @@ -14,3 +14,4 @@ class TargetAccession(Base): gene = Column(String, nullable=True) creation_date = Column(Date, nullable=False, default=date.today) modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) + is_base_editor = Column(Boolean, nullable=False, default=False) diff --git a/src/mavedb/view_models/target_accession.py b/src/mavedb/view_models/target_accession.py index bf78ae25..05406719 100644 --- a/src/mavedb/view_models/target_accession.py +++ b/src/mavedb/view_models/target_accession.py @@ -7,6 +7,7 @@ class TargetAccessionBase(BaseModel): accession: str + is_base_editor: bool assembly: Optional[str] gene: Optional[str] diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index ede0ef6b..d561cc05 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -540,7 +540,12 @@ "name": "TEST2", "category": "protein_coding", "externalIdentifiers": [], - "targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, + "targetAccession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, } ], } @@ -554,7 +559,31 @@ { "name": "TEST2", "category": "protein_coding", - "target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, + "target_accession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, + } + ], +} + +TEST_BASE_EDITOR_SCORESET = { + "title": "Test Score Set Acc Title", + "short_description": "Test accession score set", + "abstract_text": "Abstract", + "method_text": "Methods", + "target_genes": [ + { + "name": "TEST2", + "category": "protein_coding", + "target_accession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, } ], } @@ -596,6 +625,7 @@ "accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, + "isBaseEditor": False, }, } ], From 833f6d95d4ddff076a37acf7d55cfe4ac3b15e2f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sun, 2 Mar 2025 22:18:29 -0800 Subject: [PATCH 008/166] Validation logic and test cases for base editor data --- .../lib/validation/dataframe/dataframe.py | 51 ++++-- .../lib/validation/dataframe/variant.py | 7 + src/mavedb/view_models/score_set.py | 20 +++ tests/validation/dataframe/conftest.py | 2 + tests/validation/dataframe/test_dataframe.py | 165 ++++++++++++++---- tests/validation/dataframe/test_variant.py | 58 ++++++ tests/view_models/test_score_set.py | 41 +++-- 7 files changed, 275 insertions(+), 69 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index a8ab6557..61b96bb3 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -8,6 +8,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from mavedb.lib.validation.exceptions import ValidationError @@ -16,6 +17,7 @@ from mavedb.lib.validation.dataframe.variant import ( validate_hgvs_transgenic_column, validate_hgvs_genomic_column, + validate_guide_sequence_column, validate_hgvs_prefix_combinations, ) @@ -23,7 +25,7 @@ from cdot.hgvs.dataproviders import RESTDataProvider -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column, required_score_column) def validate_and_standardize_dataframe_pair( @@ -95,26 +97,31 @@ def validate_dataframe( ValidationError If one of the validators called raises an exception """ + # basic target meta data + score_set_is_accession_based = all(target.target_accession for target in targets) + score_set_is_sequence_based = all(target.target_sequence for target in targets) + score_set_is_base_editor = score_set_is_accession_based and all( + target.target_accession.is_base_editor for target in targets + ) + # basic checks - validate_column_names(df, kind) + validate_column_names(df, kind, score_set_is_base_editor) validate_no_null_rows(df) column_mapping = {c.lower(): c for c in df.columns} - index_column = choose_dataframe_index_column(df) + index_column = choose_dataframe_index_column(df, score_set_is_base_editor) prefixes: dict[str, Optional[str]] = dict() for c in column_mapping: + is_index = column_mapping[c] == index_column + if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - is_index = column_mapping[c] == index_column prefixes[c] = None # Ignore validation for null non-index hgvs columns if df[column_mapping[c]].isna().all() and not is_index: continue - score_set_is_accession_based = all(target.target_accession for target in targets) - score_set_is_sequence_based = all(target.target_sequence for target in targets) - # This is typesafe, despite Pylance's claims otherwise if score_set_is_accession_based and not score_set_is_sequence_based: validate_hgvs_genomic_column( @@ -140,6 +147,9 @@ def validate_dataframe( else: prefixes[c] = df[column_mapping[c]].dropna()[0][0] + elif c is guide_sequence_column: + validate_guide_sequence_column(df[column_mapping[c]], is_index=is_index) + else: force_numeric = (c == required_score_column) or (kind == "counts") validate_data_column(df[column_mapping[c]], force_numeric) @@ -213,7 +223,7 @@ def column_sort_function(value, columns): return df[new_columns] -def validate_column_names(df: pd.DataFrame, kind: str) -> None: +def validate_column_names(df: pd.DataFrame, kind: str, is_base_editor: bool) -> None: """Validate the column names in a dataframe. This function validates the column names in the given dataframe. @@ -256,18 +266,27 @@ def validate_column_names(df: pd.DataFrame, kind: str) -> None: raise ValueError("kind only accepts scores and counts") if hgvs_splice_column in columns: - if hgvs_nt_column not in columns or hgvs_pro_column not in columns: - raise ValidationError( - f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" - ) + msg = "dataframes with '{0}' must also define a '{1}' column" + if hgvs_nt_column not in columns: + raise ValidationError(msg.format(hgvs_splice_column, hgvs_nt_column)) + elif hgvs_pro_column not in columns: + raise ValidationError(msg.format(hgvs_splice_column, hgvs_pro_column)) if len(columns) != len(set(columns)): raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") + if is_base_editor: + msg = "dataframes for base editor data must also define the '{0}' column" + if guide_sequence_column not in columns: + raise ValidationError(msg.format(guide_sequence_column)) + + elif hgvs_nt_column not in columns: + raise ValidationError(msg.format(hgvs_nt_column)) + if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): raise ValidationError("dataframe does not define any variant columns") - if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column}): raise ValidationError("dataframe does not define any data columns") @@ -288,7 +307,7 @@ def validate_no_null_rows(df: pd.DataFrame) -> None: raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") -def choose_dataframe_index_column(df: pd.DataFrame) -> str: +def choose_dataframe_index_column(df: pd.DataFrame, is_base_editor: bool) -> str: """ Identify the HGVS variant column that should be used as the index column in this dataframe. @@ -309,7 +328,9 @@ def choose_dataframe_index_column(df: pd.DataFrame) -> str: """ column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} - if hgvs_nt_column in column_mapping: + if is_base_editor: + return column_mapping[guide_sequence_column] + elif hgvs_nt_column in column_mapping: return column_mapping[hgvs_nt_column] elif hgvs_pro_column in column_mapping: return column_mapping[hgvs_pro_column] diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 4b784994..b4b5761f 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -3,6 +3,7 @@ from typing import Hashable, Optional, TYPE_CHECKING import pandas as pd +from fqfa.validator import dna_bases_validator from mavehgvs.exceptions import MaveHgvsParseError from mavehgvs.variant import Variant @@ -235,6 +236,12 @@ def parse_transgenic_variant( return True, None +def validate_guide_sequence_column(column: pd.Series, is_index: bool) -> None: + validate_variant_column(column, is_index) + if column.apply(lambda x: dna_bases_validator(x) is None if x is not None else False).any(): + raise ValidationError("Invalid guide sequence provided: all guide sequences must be valid DNA sequences.") + + def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list[str]: """ Ensures that the sequence types of the given target sequences are an accepted type. diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index 8bc19c2d..b4def3e5 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -178,6 +178,26 @@ def at_least_one_target_gene_exists(cls, field_value, values): return field_value + # Validate nested label fields are not identical + @validator("target_genes") + def target_accession_base_editor_targets_are_consistent(cls, field_value, values): + # Only target accessions can have base editor data. + if len(field_value) > 1 and all([target.target_accession is not None for target in field_value]): + if len(set(target.target_accession.is_base_editor for target in field_value)) > 1: + # Throw the error for the first target, since it necessarily has an inconsistent base editor value. + raise ValidationError( + "All target accessions must be of the same base editor type.", + custom_loc=[ + "body", + "targetGene", + 0, + "targetAccession", + "isBaseEditor", + ], + ) + + return field_value + @validator("score_ranges") def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreRanges]): if field_value is None: diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index 8e4596db..a0cd4cb0 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -6,6 +6,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from tests.helpers.constants import TEST_CDOT_TRANSCRIPT @@ -32,6 +33,7 @@ def setUp(self): hgvs_nt_column: ["g.1A>G", "g.1A>T"], hgvs_splice_column: ["c.1A>G", "c.1A>T"], hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], + guide_sequence_column: ["AG", "AG"], required_score_column: [1.0, 2.0], "extra": [12.0, 3.0], "count1": [3.0, 5.0], diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 4bca6f2f..2eac2e83 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -9,6 +9,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from mavedb.lib.validation.dataframe.dataframe import ( @@ -43,6 +44,7 @@ def test_sort_dataframe(self): "count2", "extra2", "mixed_types", + guide_sequence_column, "null_col", ] ] @@ -165,74 +167,114 @@ def test_only_hgvs_row(self): class TestColumnNames(DfTestCase): def test_only_two_kinds_of_dataframe(self): with self.assertRaises(ValueError): - validate_column_names(self.dataframe, kind="score2") + validate_column_names(self.dataframe, kind="score2", is_base_editor=False) def test_score_df_has_score_column(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([required_score_column], axis=1), kind="scores", is_base_editor=False + ) def test_count_df_lacks_score_column(self): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts", is_base_editor=False) with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") + validate_column_names(self.dataframe, kind="counts", is_base_editor=False) def test_count_df_has_score_column(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") + validate_column_names(self.dataframe, kind="counts", is_base_editor=False) def test_df_with_only_scores(self): - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") + validate_column_names( + self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores", is_base_editor=False + ) def test_count_df_must_have_data(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") + validate_column_names( + self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts", is_base_editor=False + ) def test_just_hgvs_nt(self): - validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") validate_column_names( - self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_just_hgvs_pro(self): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_just_hgvs_pro_and_nt(self): - validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") - validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") + validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores", is_base_editor=False) + validate_column_names( + self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores", is_base_editor=False + ) def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores", is_base_editor=False) def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores", is_base_editor=False) + + def test_base_editor_must_have_nt_nt_absent(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([hgvs_nt_column], axis=1), + kind="scores", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): with self.assertRaises(ValidationError): validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") + validate_column_names( + self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_no_hgvs_column_scores(self): with pytest.raises(ValidationError) as exc_info: validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), + kind="scores", + is_base_editor=False, ) assert "dataframe does not define any variant columns" in str(exc_info.value) @@ -243,52 +285,82 @@ def test_no_hgvs_column_counts(self): [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 ), kind="counts", + is_base_editor=False, ) assert "dataframe does not define any variant columns" in str(exc_info.value) def test_validation_ignores_column_ordering_scores(self): validate_column_names( - self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" + self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], + kind="scores", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], + kind="scores", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], + kind="scores", + is_base_editor=False, ) - validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") def test_validation_ignores_column_ordering_counts(self): validate_column_names( - self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" + self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], + kind="counts", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts", is_base_editor=False + ) + validate_column_names( + self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts", is_base_editor=False ) - validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") - validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") def test_validation_is_case_insensitive(self): - validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") validate_column_names( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" + self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}), + kind="scores", + is_base_editor=False, ) def test_duplicate_hgvs_column_names_scores(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores", is_base_editor=False + ) def test_duplicate_hgvs_column_names_counts(self): with self.assertRaises(ValidationError): validate_column_names( self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="counts", + is_base_editor=False, ) def test_duplicate_score_column_names(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={"extra": required_score_column}), kind="scores", is_base_editor=False + ) def test_duplicate_data_column_names_scores(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={"count2": "count1"}), kind="scores", is_base_editor=False + ) def test_duplicate_data_column_names_counts(self): with self.assertRaises(ValidationError): validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" + self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), + kind="counts", + is_base_editor=False, ) # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses @@ -297,7 +369,9 @@ def test_invalid_column_names_scores(self): for value in invalid_values: with self.subTest(value=value): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores", is_base_editor=False + ) def test_invalid_column_names_counts(self): invalid_values = [None, np.nan, "", " "] @@ -309,36 +383,55 @@ def test_invalid_column_names_counts(self): columns={hgvs_splice_column: value} ), kind="counts", + is_base_editor=False, ) def test_ignore_column_ordering_scores(self): validate_column_names( self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], kind="scores", + is_base_editor=False, ) def test_ignore_column_ordering_counts(self): validate_column_names( self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], kind="counts", + is_base_editor=False, ) + def test_is_base_editor_and_contains_guide_sequence_column(self): + validate_column_names(self.dataframe, kind="scores", is_base_editor=True) + + def test_is_base_editor_and_does_not_contain_guide_sequence_column(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop(guide_sequence_column, axis=1), kind="scores", is_base_editor=True + ) + class TestChooseDataframeIndexColumn(DfTestCase): def setUp(self): super().setUp() + def test_guide_sequence_index_column(self): + index = choose_dataframe_index_column(self.dataframe, is_base_editor=True) + assert index == guide_sequence_column + def test_nt_index_column(self): - index = choose_dataframe_index_column(self.dataframe) + index = choose_dataframe_index_column(self.dataframe, is_base_editor=False) assert index == hgvs_nt_column def test_pro_index_column(self): - index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) + index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1), is_base_editor=False) assert index == hgvs_pro_column def test_no_valid_index_column(self): with self.assertRaises(ValidationError): - choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) + choose_dataframe_index_column( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), + is_base_editor=False, + ) class TestValidateHgvsPrefixCombinations(TestCase): diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index 810780f4..c8a0f258 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -9,6 +9,7 @@ hgvs_splice_column, ) from mavedb.lib.validation.dataframe.variant import ( + validate_guide_sequence_column, validate_hgvs_transgenic_column, validate_hgvs_genomic_column, parse_genomic_variant, @@ -810,6 +811,63 @@ def test_parse_mismatched_transgenic_variant(self): assert "target sequence mismatch" in error +class TestValidateGuideSequenceColumn(DfTestCase): + def setUp(self): + super().setUp() + + self.valid_guide_sequences = [ + pd.Series(["ATG", "TGA"], name="guide_sequence"), + pd.Series(["ATGC", "TGAC"], name="guide_sequence"), + pd.Series(["ATGCG", "TGACG"], name="guide_sequence"), + ] + + self.invalid_guide_sequences = [ + pd.Series(["ATG", "XYZ"], name="guide_sequence"), # invalid DNA sequence + pd.Series(["123", "123"], name="guide_sequence"), # contains numeric + ] + + self.invalid_index_guide_sequences = [ + pd.Series(["ATG", None], name="guide_sequence"), # contains None value + pd.Series(["ATG", "ATG"], name="guide_sequence"), # identical sequences + ] + + self.accession_test_case = AccessionTestCase() + + def test_valid_guide_sequences(self): + for column in self.valid_guide_sequences + self.invalid_index_guide_sequences: + with self.subTest(column=column): + validate_guide_sequence_column( + column, + is_index=False, + ) + + def test_invalid_guide_sequences(self): + for column in self.invalid_guide_sequences: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_guide_sequence_column( + column, + is_index=False, + ) + + def test_valid_guide_sequences_index(self): + for column in self.valid_guide_sequences: + with self.subTest(column=column): + validate_guide_sequence_column( + column, + is_index=True, + ) + + def test_invalid_guide_sequences_index(self): + for column in self.invalid_guide_sequences + self.invalid_index_guide_sequences: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_guide_sequence_column( + column, + is_index=True, + ) + + class TestValidateObservedSequenceTypes(unittest.TestCase): def setUp(self): super().setUp() diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index 1247020c..c155f9b7 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -3,7 +3,7 @@ from mavedb.view_models.publication_identifier import PublicationIdentifierCreate from mavedb.view_models.score_set import ScoreSetCreate, ScoreSetModify from mavedb.view_models.target_gene import TargetGeneCreate -from tests.helpers.constants import TEST_MINIMAL_SEQ_SCORESET +from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET def test_cannot_create_score_set_without_a_target(): @@ -459,25 +459,10 @@ def test_cannot_create_score_set_with_normal_range_and_no_wild_type_score(): ], } - with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) - - assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) - - -def test_cannot_create_score_set_without_default_ranges(): - score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - score_set_test["score_ranges"] = { - "wt_score": -0.5, - "ranges": [ - {"label": "range_1", "classification": "other", "range": (-1, 0)}, - ], - } - with pytest.raises(ValueError) as exc_info: ScoreSetModify(**score_set_test) - assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) + assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) @pytest.mark.parametrize("classification", ["normal", "abnormal", "not_specified"]) @@ -491,4 +476,24 @@ def test_can_create_score_set_with_any_range_classification(classification): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) + assert "Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal']" in str(exc_info.value) + + +def test_cannot_create_score_set_with_inconsistent_base_editor_flags(): + score_set_test = TEST_MINIMAL_ACC_SCORESET.copy() + + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) + + target_gene_one.target_accession.is_base_editor = True + target_gene_two.target_accession.is_base_editor = False + + score_set_test.pop("targetGenes") + with pytest.raises(ValueError) as exc_info: + ScoreSetModify( + **score_set_test, + target_genes=[target_gene_one, target_gene_two], + ) + + assert "All target accessions must be of the same base editor type." in str(exc_info.value) From 0c74f1ddbfeb89239cefe229778ae428e524c5a8 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:12:49 -0800 Subject: [PATCH 009/166] Add isBaseEditor Flag to Remaining Accession Tests --- tests/helpers/constants.py | 2 +- tests/view_models/test_target_gene.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index d561cc05..f87c4341 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -563,7 +563,7 @@ "accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, - "isBaseEditor": False, + "is_base_editor": False, }, } ], diff --git a/tests/view_models/test_target_gene.py b/tests/view_models/test_target_gene.py index 13f8b78a..e72eafd7 100644 --- a/tests/view_models/test_target_gene.py +++ b/tests/view_models/test_target_gene.py @@ -44,7 +44,7 @@ def test_create_target_gene_with_accession(): name = "BRCA1" category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] - target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} + target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1", "isBaseEditor": False} externalIdentifier = TargetGeneCreate( name=name, category=category, @@ -206,7 +206,7 @@ def test_cant_create_target_gene_with_both_sequence_and_accession(): name = "UBE2I" category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] - target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} + target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1", "isBaseEditor": False} target_sequence = { "sequenceType": "dna", "sequence": "ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGA" From e56d2990a17742cbeb398251dfa38473dea18fa0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:45:55 -0800 Subject: [PATCH 010/166] Add GUIDE_SEQUENCE_COLUMN constant to mave lib --- src/mavedb/lib/mave/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mavedb/lib/mave/constants.py b/src/mavedb/lib/mave/constants.py index f313436a..a94da0c1 100644 --- a/src/mavedb/lib/mave/constants.py +++ b/src/mavedb/lib/mave/constants.py @@ -6,6 +6,7 @@ HGVS_NT_COLUMN = "hgvs_nt" HGVS_SPLICE_COLUMN = "hgvs_splice" HGVS_PRO_COLUMN = "hgvs_pro" +GUIDE_SEQUENCE_COLUMN = "guide_sequence" HGVS_COLUMNS = sorted([HGVS_NT_COLUMN, HGVS_PRO_COLUMN, HGVS_SPLICE_COLUMN]) # META_DATA = 'meta_data' From 5bc49dee21c817d9e0ced4d8ab7a62df7d71f766 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:46:13 -0800 Subject: [PATCH 011/166] Use existing boolean flag for transgenic marker in prefix validation --- src/mavedb/lib/validation/dataframe/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index 61b96bb3..a43f6b55 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -158,7 +158,7 @@ def validate_dataframe( hgvs_nt=prefixes[hgvs_nt_column], hgvs_splice=prefixes[hgvs_splice_column], hgvs_pro=prefixes[hgvs_pro_column], - transgenic=all(target.target_sequence for target in targets), + transgenic=score_set_is_sequence_based, ) From 6fc41fe355eb4c94d931e0691e9b6d61566c2efc Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 11:27:00 -0800 Subject: [PATCH 012/166] Clarify error message for accession based variants with accessions missing from targets list --- src/mavedb/lib/validation/dataframe/column.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py index 8505a8cc..ef6ee23c 100644 --- a/src/mavedb/lib/validation/dataframe/column.py +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -82,7 +82,9 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: if not all(str(v).split(":")[1][:2] in prefixes for v in variants): raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") if not all(str(v).split(":")[0] in targets for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") + raise ValidationError( + f"variant column '{column.name}' has invalid accession identifiers; some accession identifiers present in the score file were not added as targets" + ) else: if len(set(v[:2] for v in variants)) > 1: From 9f7a88eec70f5453db68ab7e87a59c76d7e35c70 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 11:29:11 -0800 Subject: [PATCH 013/166] Move guide sequence column to the end of the standard columns sorted list --- src/mavedb/lib/validation/dataframe/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index a43f6b55..edb253be 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -25,7 +25,7 @@ from cdot.hgvs.dataproviders import RESTDataProvider -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column, required_score_column) +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column, guide_sequence_column) def validate_and_standardize_dataframe_pair( From ddd651725048349d1e69be61337cac049692d2eb Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:15:44 -0800 Subject: [PATCH 014/166] Add additional column validation tests --- tests/validation/dataframe/test_dataframe.py | 40 +++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 2eac2e83..0673523d 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -494,14 +494,50 @@ def test_same_df(self): def test_ignore_order(self): validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) - def test_missing_column(self): + def test_missing_column_nt(self): with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) - def test_missing_variant(self): + def test_missing_column_pro(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_pro_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_pro_column, axis=1), self.dataframe) + + def test_missing_column_splice(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_splice_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_splice_column, axis=1), self.dataframe) + + def test_missing_column_guide(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(guide_sequence_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(guide_sequence_column, axis=1), self.dataframe) + + def test_missing_variant_nt(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_nt_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) + + def test_missing_variant_pro(self): df2 = self.dataframe.copy() df2.loc[0, hgvs_pro_column] = None with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe, df2) + + def test_missing_variant_splice(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_splice_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) + + def test_missing_guide(self): + df2 = self.dataframe.copy() + df2.loc[0, guide_sequence_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) From 1ffb8910e52c8195f6ec5e6628961809bb1c1c26 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:16:03 -0800 Subject: [PATCH 015/166] Fix sort order of dataframe test case columns --- tests/validation/dataframe/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index a0cd4cb0..38612df4 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -33,8 +33,8 @@ def setUp(self): hgvs_nt_column: ["g.1A>G", "g.1A>T"], hgvs_splice_column: ["c.1A>G", "c.1A>T"], hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], - guide_sequence_column: ["AG", "AG"], required_score_column: [1.0, 2.0], + guide_sequence_column: ["AG", "AG"], "extra": [12.0, 3.0], "count1": [3.0, 5.0], "count2": [9, 10], From 9cb16e4a5fe83d7cea6bfa8f130b584ca23965cc Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:16:25 -0800 Subject: [PATCH 016/166] Use equality comparison over is operator for column name comparison --- src/mavedb/lib/validation/dataframe/dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index edb253be..be95b5b4 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -147,7 +147,7 @@ def validate_dataframe( else: prefixes[c] = df[column_mapping[c]].dropna()[0][0] - elif c is guide_sequence_column: + elif c == guide_sequence_column: validate_guide_sequence_column(df[column_mapping[c]], is_index=is_index) else: @@ -377,7 +377,7 @@ def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): If both dataframes do not define the same variants within each column """ for c in df1.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column): if c not in df2: raise ValidationError("both score and count dataframes must define matching HGVS columns") elif df1[c].isnull().all() and df2[c].isnull().all(): @@ -387,6 +387,6 @@ def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" ) for c in df2.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column): if c not in df1: raise ValidationError("both score and count dataframes must define matching HGVS columns") From d1f4f9eaafaf36945f6be6c8759273925765ea74 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:17:23 -0800 Subject: [PATCH 017/166] Allow the Unix Domain Socket during test runs This allows the use of the vs-code pytest extension but still prevents the use of external connections. Enabling this socket makes it easier to test within the code editor. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 79467b15..e9681321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,7 +97,7 @@ plugins = [ mypy_path = "mypy_stubs" [tool.pytest.ini_options] -addopts = "-v -rP --import-mode=importlib --disable-socket --allow-hosts localhost,::1,127.0.0.1" +addopts = "-v -rP --import-mode=importlib --disable-socket --allow-unix-socket --allow-hosts localhost,::1,127.0.0.1" asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." From 2b4761b75cc4ffc62b159e2caf91a4271e607a48 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:18:59 -0800 Subject: [PATCH 018/166] Add Multi-Variant Support for Accession Based Targets The hgvs package is not able to parse allelic variation (multi-variants denoted by brackets), which are often a key variant string in base editor data. We work around this by: - Parsing the multi-variant into MaveHGVS without any target info to ascertain whether it is syntactically valid - Parsing each subvariant against the provided transcript to ascertain whether it is informationally valid --- .../lib/validation/dataframe/variant.py | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index b4b5761f..07c400d0 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -76,7 +76,7 @@ def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: target_seqs = construct_target_sequence_mappings(column, targets) parsed_variants = [ - parse_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() + validate_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() ] # format and raise an error message that contains all invalid variants @@ -168,10 +168,10 @@ def validate_hgvs_genomic_column( hp, vr = None, None if hp is not None and vr is not None: - parsed_variants = [parse_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] + parsed_variants = [validate_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] else: parsed_variants = [ - parse_transgenic_variant( + validate_transgenic_variant( idx, variant, {target: None for target in target_accession_identifiers}, @@ -190,9 +190,46 @@ def validate_hgvs_genomic_column( return -def parse_genomic_variant( +def validate_genomic_variant( idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" ) -> tuple[bool, Optional[str]]: + def _validate_allelic_variation(variant: str) -> bool: + """ + The HGVS package is currently unable to parse allelic variation, and this doesn't seem like a planned + feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS, + does support this sort of multivariant we can: + - Validate that the multi-variant allele is valid HGVS. + - Validate each sub-variant in an allele is valid with respect to the transcript. + + Parameters + ---------- + variant : str + The multi-variant allele to validate. + + Returns + ------- + bool + True if the allele is valid. + + Raises + ------ + MaveHgvsParseError + If the variant is not a valid HGVS string (for reasons of syntax). + hgvs.exceptions.HGVSError + If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). + """ + transcript, multi_variant = variant.split(":") + + # Validate that the multi-variant allele is valid HGVS. + Variant(multi_variant) + prefix, variants = multi_variant[0:2], multi_variant[2:] + + # Validate each sub-variant in an allele is valid with respect to the transcript. + for subvariant in variants.strip("[]").split(";"): + validator.validate(parser.parse(f"{transcript}:{prefix}{subvariant}"), strict=False) + + return True + # Not pretty, but if we make it here we're guaranteed to have hgvs installed as a package, and we # should make use of the built in exception they provide for variant validation. import hgvs.exceptions @@ -202,14 +239,19 @@ def parse_genomic_variant( for variant in variant_string.split(" "): try: - validator.validate(parser.parse(variant), strict=False) + if "[" in variant: + _validate_allelic_variation(variant) + else: + validator.validate(parser.parse(variant), strict=False) + except MaveHgvsParseError: + return False, f"Failed to parse variant string '{variant}' at row {idx}." except hgvs.exceptions.HGVSError as e: - return False, f"Failed to parse row {idx} with HGVS exception: {e}" + return False, f"Failed to parse row {idx} with HGVS exception: {e}." return True, None -def parse_transgenic_variant( +def validate_transgenic_variant( idx: Hashable, variant_string: str, target_sequences: dict[str, Optional[str]], is_fully_qualified: bool ) -> tuple[bool, Optional[str]]: if not variant_string: From 53e906528af5572eefd5473b91c9c9185dbb7836 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:21:46 -0800 Subject: [PATCH 019/166] Multi-Variant Genomic Validation Tests Adds tests for multi-variant validation for accession based variants. As part of this change, an additional transcript was added to tests genomic based protein variants in addition to just testing nucleotide based variants. --- tests/conftest_optional.py | 12 +- tests/helpers/constants.py | 37 ++- tests/helpers/data/refseq.NP_001637.4.fasta | 2 + .../helpers/data/refseq.NP_001637.4.fasta.fai | 1 + tests/helpers/util/score_set.py | 6 +- tests/routers/test_hgvs.py | 26 +- tests/routers/test_statistics.py | 8 +- tests/validation/dataframe/conftest.py | 17 +- tests/validation/dataframe/test_dataframe.py | 2 +- tests/validation/dataframe/test_variant.py | 311 +++++++++++------- tests/worker/test_jobs.py | 22 +- 11 files changed, 285 insertions(+), 159 deletions(-) create mode 100644 tests/helpers/data/refseq.NP_001637.4.fasta create mode 100644 tests/helpers/data/refseq.NP_001637.4.fasta.fai diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index e0f35fc4..722e8dc6 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -138,19 +138,23 @@ def data_provider(): To provide the transcript for the FASTA file without a network request, use: ``` - from helpers.utils.constants import TEST_CDOT_TRANSCRIPT + from helpers.utils.constants import TEST_NT_CDOT_TRANSCRIPT, TEST_PRO_CDOT_TRANSCRIPT from unittest.mock import patch import cdot.hgvs.dataproviders - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT): + ... + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT): ... ``` """ this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) - test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + test_nt_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + test_pro_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NP_001637.4.fasta") data_provider = cdot.hgvs.dataproviders.RESTDataProvider( seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( - cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), + cdot.hgvs.dataproviders.FastaSeqFetcher(test_nt_fasta_file), + cdot.hgvs.dataproviders.FastaSeqFetcher(test_pro_fasta_file), # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). cdot.hgvs.dataproviders.SeqFetcher(), ) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index f87c4341..a1b528ae 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -11,7 +11,8 @@ TEST_CROSSREF_IDENTIFIER = "10.1371/2021.06.22.21259265" TEST_ORCID_ID = "1111-1111-1111-1111" -VALID_ACCESSION = "NM_001637.3" +VALID_NT_ACCESSION = "NM_001637.3" +VALID_PRO_ACCESSION = "NP_001637.4" VALID_GENE = "BRCA1" SAVED_PUBMED_PUBLICATION = { @@ -541,7 +542,7 @@ "category": "protein_coding", "externalIdentifiers": [], "targetAccession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -560,7 +561,7 @@ "name": "TEST2", "category": "protein_coding", "target_accession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "is_base_editor": False, @@ -579,7 +580,7 @@ "name": "TEST2", "category": "protein_coding", "target_accession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -622,7 +623,7 @@ "externalIdentifiers": [], "targetAccession": { "recordType": "TargetAccession", - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -643,10 +644,32 @@ "officialCollections": [], } -TEST_CDOT_TRANSCRIPT = { +TEST_NT_CDOT_TRANSCRIPT = { "start_codon": 0, "stop_codon": 18, - "id": VALID_ACCESSION, + "id": VALID_NT_ACCESSION, + "gene_version": "313", + "gene_name": VALID_GENE, + "biotype": ["protein_coding"], + "protein": "NP_001628.1", + "genome_builds": { + "GRCh37": { + "cds_end": 1, + "cds_start": 18, + "contig": "NC_000007.13", + # The exons are non-sense but it doesn't really matter for the tests. + "exons": [[1, 12, 20, 2001, 2440, "M196 I1 M61 I1 M181"], [12, 18, 19, 1924, 2000, None]], + "start": 1, + "stop": 18, + "strand": "+", + } + }, +} + +TEST_PRO_CDOT_TRANSCRIPT = { + "start_codon": 0, + "stop_codon": 18, + "id": VALID_PRO_ACCESSION, "gene_version": "313", "gene_name": VALID_GENE, "biotype": ["protein_coding"], diff --git a/tests/helpers/data/refseq.NP_001637.4.fasta b/tests/helpers/data/refseq.NP_001637.4.fasta new file mode 100644 index 00000000..6904295b --- /dev/null +++ b/tests/helpers/data/refseq.NP_001637.4.fasta @@ -0,0 +1,2 @@ +>NP_001637.4 range=chr7:36512941-36724494 5'pad=0 3'pad=0 strand=- repeatMasking=none +DYGYYDYGYYDYGYYDYGYYDYGYYDYGYYDYGYY diff --git a/tests/helpers/data/refseq.NP_001637.4.fasta.fai b/tests/helpers/data/refseq.NP_001637.4.fasta.fai new file mode 100644 index 00000000..eb93b5fa --- /dev/null +++ b/tests/helpers/data/refseq.NP_001637.4.fasta.fai @@ -0,0 +1 @@ +NP_001637.4 35 86 35 36 diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index 1cc61e54..8086dbed 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -6,7 +6,7 @@ from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, TEST_CDOT_TRANSCRIPT +from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, TEST_NT_CDOT_TRANSCRIPT from fastapi.testclient import TestClient @@ -39,7 +39,9 @@ def create_acc_score_set( jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): response = client.post("/api/v1/score-sets/", json=score_set_payload) assert response.status_code == 200, "Could not create accession based score set" diff --git a/tests/routers/test_hgvs.py b/tests/routers/test_hgvs.py index 9a19f709..b931d859 100644 --- a/tests/routers/test_hgvs.py +++ b/tests/routers/test_hgvs.py @@ -10,24 +10,24 @@ fastapi = pytest.importorskip("fastapi") hgvs = pytest.importorskip("hgvs") -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION, VALID_GENE +from tests.helpers.constants import TEST_NT_CDOT_TRANSCRIPT, VALID_NT_ACCESSION, VALID_GENE VALID_MAJOR_ASSEMBLY = "GRCh38" VALID_MINOR_ASSEMBLY = "GRCh38.p3" INVALID_ASSEMBLY = "undefined" -INVALID_ACCESSION = "NC_999999.99" +INVALID_NT_ACCESSION = "NC_999999.99" SMALL_ACCESSION = "NM_002977.4" INVALID_GENE = "fnord" VALID_TRANSCRIPT = "NM_001408458.1" INVALID_TRANSCRIPT = "NX_99999.1" -VALID_VARIANT = VALID_ACCESSION + ":c.1G>A" -INVALID_VARIANT = VALID_ACCESSION + ":c.1delA" +VALID_VARIANT = VALID_NT_ACCESSION + ":c.1G>A" +INVALID_VARIANT = VALID_NT_ACCESSION + ":c.1delA" HAS_PROTEIN_ACCESSION = "NM_000014.4" PROTEIN_ACCESSION = "NP_000005.2" def test_hgvs_fetch_valid(client, setup_router_db): - response = client.get(f"/api/v1/hgvs/fetch/{VALID_ACCESSION}") + response = client.get(f"/api/v1/hgvs/fetch/{VALID_NT_ACCESSION}") assert response.status_code == 200 assert response.text == '"GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACA"' @@ -43,14 +43,18 @@ def test_hgvs_fetch_invalid(client, setup_router_db): def test_hgvs_validate_valid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): payload = {"variant": VALID_VARIANT} response = client.post("/api/v1/hgvs/validate", json=payload) assert response.status_code == 200 def test_hgvs_validate_invalid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): payload = {"variant": INVALID_VARIANT} response = client.post("/api/v1/hgvs/validate", json=payload) @@ -144,7 +148,9 @@ def test_hgvs_gene_transcript_invalid(client, setup_router_db): def test_hgvs_transcript_valid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): response = client.get(f"/api/v1/hgvs/{VALID_TRANSCRIPT}") assert response.status_code == 200 @@ -189,9 +195,9 @@ def test_hgvs_transcript_protein_no_protein(client, setup_router_db): def test_hgvs_transcript_protein_invalid(client, setup_router_db): with requests_mock.mock() as m: - m.get(f"https://cdot.cc/transcript/{INVALID_ACCESSION}", status_code=404) + m.get(f"https://cdot.cc/transcript/{INVALID_NT_ACCESSION}", status_code=404) - response = client.get(f"/api/v1/hgvs/protein/{INVALID_ACCESSION}") + response = client.get(f"/api/v1/hgvs/protein/{INVALID_NT_ACCESSION}") assert m.called assert response.status_code == 404 diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index a26f349e..f6ef6f6c 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -12,7 +12,7 @@ from tests.helpers.constants import ( TEST_BIORXIV_IDENTIFIER, - TEST_CDOT_TRANSCRIPT, + TEST_NT_CDOT_TRANSCRIPT, TEST_KEYWORDS, TEST_MEDRXIV_IDENTIFIER, TEST_MINIMAL_ACC_SCORESET, @@ -44,7 +44,9 @@ @pytest.fixture def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): experiment = create_experiment(client) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): score_set = create_acc_score_set(client, experiment["urn"]) score_set = mock_worker_variant_insertion( client, session, data_provider, score_set, data_files / "scores_acc.csv" @@ -242,7 +244,7 @@ def test_target_gene_identifier_statistiscs( experiment = create_experiment(client) if "targetAccession" in target: with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): unpublished_score_set = create_acc_score_set(client, experiment["urn"]) unpublished_score_set = mock_worker_variant_insertion( diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index 38612df4..0cbba30d 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -9,19 +9,22 @@ guide_sequence_column, required_score_column, ) -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT +from tests.helpers.constants import TEST_NT_CDOT_TRANSCRIPT, TEST_PRO_CDOT_TRANSCRIPT @pytest.fixture def mocked_data_provider_class_attr(request): """ - Sets the `human_data_provider` attribute on the class from the requesting - test context to the `data_provider` fixture. This allows fixture use across - the `unittest.TestCase` class. + Sets the `mocked_nt_human_data_provider` and `mocked_pro_human_data_provider` + attributes on the class from the requesting test context to the `data_provider` + fixture. This allows fixture use across the `unittest.TestCase` class. """ - data_provider = mock.Mock() - data_provider._get_transcript.return_value = TEST_CDOT_TRANSCRIPT - request.cls.mocked_human_data_provider = data_provider + nt_data_provider = mock.Mock() + nt_data_provider._get_transcript.return_value = TEST_NT_CDOT_TRANSCRIPT + pro_data_provider = mock.Mock() + pro_data_provider._get_transcript.return_value = TEST_PRO_CDOT_TRANSCRIPT + request.cls.mocked_nt_human_data_provider = nt_data_provider + request.cls.mocked_pro_human_data_provider = pro_data_provider # Special DF Test Case that contains dummy data for tests below diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 0673523d..884d271d 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -144,7 +144,7 @@ class TestValidateStandardizeDataFramePair(DfTestCase): def test_no_targets(self): with self.assertRaises(ValueError): validate_and_standardize_dataframe_pair( - self.dataframe, counts_df=None, targets=[], hdp=self.mocked_human_data_provider + self.dataframe, counts_df=None, targets=[], hdp=self.mocked_nt_human_data_provider ) # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index c8a0f258..931c044b 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -12,14 +12,19 @@ validate_guide_sequence_column, validate_hgvs_transgenic_column, validate_hgvs_genomic_column, - parse_genomic_variant, - parse_transgenic_variant, + validate_genomic_variant, + validate_transgenic_variant, validate_observed_sequence_types, validate_hgvs_prefix_combinations, ) from mavedb.lib.validation.exceptions import ValidationError -from tests.helpers.constants import VALID_ACCESSION, TEST_CDOT_TRANSCRIPT +from tests.helpers.constants import ( + VALID_NT_ACCESSION, + VALID_PRO_ACCESSION, + TEST_NT_CDOT_TRANSCRIPT, + TEST_PRO_CDOT_TRANSCRIPT, +) from tests.validation.dataframe.conftest import DfTestCase @@ -348,45 +353,60 @@ def test_valid_column_values_wrong_column_name_multi_target(self): # Spoof the accession type class AccessionTestCase: - def __init__(self): - self.accession = VALID_ACCESSION + def __init__(self, accession): + self.accession = accession class GenomicColumnValidationTestCase(DfTestCase): def setUp(self): super().setUp() - self.accession_test_case = AccessionTestCase() + self.accession_test_case = [AccessionTestCase(VALID_NT_ACCESSION), AccessionTestCase(VALID_PRO_ACCESSION)] + + self.valid_hgvs_nt_column = pd.Series( + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[1G>A;2A>T]"], + name=hgvs_nt_column, + ) + + self.valid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.Asp1Tyr", + f"{VALID_PRO_ACCESSION}:p.Tyr2Asp", + f"{VALID_PRO_ACCESSION}:p.[Asp1Tyr;Tyr2Asp]", + ], + name=hgvs_pro_column, + ) - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + self.missing_data = pd.Series([f"{VALID_NT_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) + self.duplicate_data = pd.Series( + [f"{VALID_NT_ACCESSION}:c.4A>G", f"{VALID_NT_ACCESSION}:c.4A>G"], name=hgvs_nt_column ) - self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) - self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) self.invalid_hgvs_columns_by_name = [ - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), + pd.Series([f"{VALID_NT_ACCESSION}:g.1A>G", f"{VALID_NT_ACCESSION}:g.1A>T"], name=hgvs_splice_column), + pd.Series([f"{VALID_NT_ACCESSION}:g.1A>G", f"{VALID_NT_ACCESSION}:g.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:n.1A>G", f"{VALID_NT_ACCESSION}:n.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:p.Met1Val", f"{VALID_NT_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), ] self.invalid_hgvs_columns_by_contents = [ pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column + [f"{VALID_NT_ACCESSION}:r.1a>g", f"{VALID_NT_ACCESSION}:r.1a>u"], name=hgvs_splice_column ), # rna not allowed pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:r.1a>g", f"{VALID_NT_ACCESSION}:r.1a>u"], name=hgvs_nt_column ), # rna not allowed - pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series([f"{VALID_NT_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series( + [f"{VALID_NT_ACCESSION}:n.1A>G", f"{VALID_NT_ACCESSION}:c.1A>T"], name=hgvs_nt_column + ), # mixed prefix pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column ), # mixed types/prefix pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified - pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric @@ -394,7 +414,7 @@ def setUp(self): self.invalid_hgvs_columns_by_contents_under_strict_validation = [ pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.5A>T"], name=hgvs_nt_column ), # out of bounds for target ] @@ -408,8 +428,8 @@ def test_valid_variant_invalid_missing_index(self): validate_hgvs_genomic_column( self.missing_data, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore # Identical behavior for installed/uninstalled HGVS @@ -420,8 +440,8 @@ def test_valid_variant_invalid_duplicate_index(self): validate_hgvs_genomic_column( self.duplicate_data, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore @@ -441,45 +461,65 @@ def patched_data_provider_class_attr(request, data_provider): class TestValidateHgvsGenomicColumnHgvsInstalled(GenomicColumnValidationTestCase): def test_valid_variant(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_nt_column, + is_index=False, + targets=self.accession_test_case, + hdp=self.patched_human_data_provider, + ) # type: ignore + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_pro_column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_valid_missing(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_valid_duplicate(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_index(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, + hdp=self.patched_human_data_provider, + ) # type: ignore + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_pro_column, + is_index=True, + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore @@ -491,13 +531,13 @@ def test_invalid_column_values(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) for column in ( @@ -507,13 +547,13 @@ def test_invalid_column_values(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) @@ -523,13 +563,13 @@ def test_valid_column_values_wrong_column_name(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: @@ -537,13 +577,13 @@ def test_valid_column_values_wrong_column_name(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) @@ -555,54 +595,70 @@ class TestValidateHgvsGenomicColumnHgvsNotInstalled(GenomicColumnValidationTestC def test_valid_variant_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, + ) # type: ignore + + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_nt_column, + is_index=True, + targets=self.accession_test_case, + hdp=self.mocked_pro_human_data_provider, ) # type: ignore def test_valid_variant_limited_validation(self): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=None - ) # type: ignore + for column in [self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]: + with self.subTest(column=column): + validate_hgvs_genomic_column(column, is_index=False, targets=self.accession_test_case, hdp=None) def test_valid_variant_valid_missing_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore def test_valid_variant_valid_missing_limited_validation(self): - validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=self.accession_test_case, hdp=None) # type: ignore def test_valid_variant_valid_duplicate_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore def test_valid_variant_valid_duplicate_limited_validation(self): - validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=self.accession_test_case, hdp=None) # type: ignore def test_valid_variant_index_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, + ) # type: ignore + + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_pro_column, + is_index=True, + targets=self.accession_test_case, + hdp=self.mocked_pro_human_data_provider, ) # type: ignore def test_valid_variant_index_limited_validation(self): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=None - ) # type: ignore + for column in [self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]: + with self.subTest(column=column): + validate_hgvs_genomic_column(column, is_index=True, targets=self.accession_test_case, hdp=None) def test_invalid_column_values_strict_validation(self): for column in ( @@ -612,8 +668,8 @@ def test_invalid_column_values_strict_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) for column in ( self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation @@ -622,8 +678,8 @@ def test_invalid_column_values_strict_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) def test_invalid_column_values_limited_validation(self): @@ -632,7 +688,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_contents: @@ -640,7 +696,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_contents_under_strict_validation: @@ -648,7 +704,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) @@ -658,16 +714,16 @@ def test_valid_column_values_wrong_column_name_strict_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: with self.subTest(column=column), self.assertRaises(ValidationError): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) def test_valid_column_values_wrong_column_name_limited_validation(self): @@ -676,7 +732,7 @@ def test_valid_column_values_wrong_column_name_limited_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: @@ -684,12 +740,12 @@ def test_valid_column_values_wrong_column_name_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) -class TestParseGenomicVariant(unittest.TestCase): +class TestValidateGenomicVariant(unittest.TestCase): def setUp(self): super().setUp() @@ -699,94 +755,121 @@ def setUp(self): self.validator.validate.return_value = True self.falsy_variant_strings = [None, ""] - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + self.valid_hgvs_nt_column = pd.Series( + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[2A>T;1G>A]"], + name=hgvs_nt_column, + ) + self.invalid_hgvs_nt_column = pd.Series( + [ + f"{VALID_NT_ACCESSION}:c.1laksdfG>A", + f"{VALID_NT_ACCESSION}:c.2kadlfjA>T", + f"{VALID_NT_ACCESSION}:[c.2A>T;c.1G>A]", + ], + name=hgvs_nt_column, + ) + self.valid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.Asp1Tyr", + f"{VALID_PRO_ACCESSION}:p.Tyr2Asp", + f"{VALID_PRO_ACCESSION}:p.[Asp1Tyr;Tyr2Asp]", + ], + name=hgvs_pro_column, ) - self.invalid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1laksdfG>A", f"{VALID_ACCESSION}:c.2kadlfjA>T"], name=hgvs_nt_column + self.invalid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.1laksdfG>A", + f"{VALID_PRO_ACCESSION}:p.2kadlfjA>T", + f"{VALID_PRO_ACCESSION}:[p.Asp1Tyr;p.Tyr2Asp]", + ], + name=hgvs_pro_column, ) @unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") -class TestParseGenomicVariantHgvsInstalled(TestParseGenomicVariant): - def test_parse_genomic_variant_nonetype_variant_string(self): - for variant_string in self.falsy_variant_strings: +class TestValidateGenomicVariantHgvsInstalled(TestValidateGenomicVariant): + def test_validate_genomic_variant_nonetype_variant_string(self): + for idx, variant_string in enumerate(self.falsy_variant_strings): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, None, self.parser, self.validator) + valid, error = validate_genomic_variant(idx, None, self.parser, self.validator) assert valid assert error is None - def test_parse_valid_hgvs_variant(self): - for variant_string in self.valid_hgvs_column: + def test_validate_valid_hgvs_variant(self): + for idx, variant_string in enumerate([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) assert valid assert error is None - def test_parse_invalid_hgvs_variant(self): + def test_validate_invalid_hgvs_variant(self): from hgvs.exceptions import HGVSError self.validator.validate.side_effect = HGVSError("Invalid variant") - for variant_string in self.invalid_hgvs_column: + for idx, variant_string in enumerate((self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column)): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) assert not valid - assert "Failed to parse row 0 with HGVS exception:" in error + assert f"Failed to parse row {idx} with HGVS exception:" in error @unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") -class TestParseGenomicVariantHgvsNotInstalled(TestParseGenomicVariant): - def test_parse_genomic_variant_nonetype_variant_string(self): - for variant_string in self.falsy_variant_strings: +class TestValidateGenomicVariantHgvsNotInstalled(TestValidateGenomicVariant): + def test_validate_genomic_variant_nonetype_variant_string(self): + for idx, variant_string in enumerate(self.falsy_variant_strings): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, None, self.parser, self.validator) + validate_genomic_variant(idx, None, self.parser, self.validator) - def test_parse_valid_hgvs_variant(self): - for variant_string in self.valid_hgvs_column: + def test_validate_valid_hgvs_variant(self): + for idx, variant_string in enumerate( + [column for column in [self.valid_hgvs_nt_column + self.valid_hgvs_pro_column]] + ): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + validate_genomic_variant(idx, variant_string, self.parser, self.validator) - def test_parse_invalid_hgvs_variant(self): - for variant_string in self.invalid_hgvs_column: + def test_validate_invalid_hgvs_variant(self): + for idx, variant_string in enumerate( + [column for column in [self.invalid_hgvs_nt_column + self.invalid_hgvs_pro_column]] + ): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + validate_genomic_variant(idx, variant_string, self.parser, self.validator) -class TestParseTransgenicVariant(unittest.TestCase): +class TestValidateTransgenicVariant(unittest.TestCase): def setUp(self): super().setUp() - self.target_sequences = {f"{VALID_ACCESSION}": "ATGC"} + self.target_sequences = {f"{VALID_NT_ACCESSION}": "ATGC"} self.falsy_variant_strings = [None, ""] self.valid_fully_qualified_transgenic_column = pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.2T>G {VALID_ACCESSION}:c.2T>G"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.2T>G {VALID_NT_ACCESSION}:c.2T>G"], + name=hgvs_nt_column, ) self.valid_basic_transgenic_column = pd.Series(["c.1A>G", "c.2T>G c.2T>G"], name=hgvs_nt_column) self.invalid_transgenic_column = pd.Series(["123A>X", "NM_001:123A>Y"], name=hgvs_nt_column) self.mismatched_transgenic_column = pd.Series(["c.1T>G", "c.2A>G"], name=hgvs_nt_column) - def test_parse_transgenic_variant_nonetype_variant_string(self): + def test_validate_transgenic_variant_nonetype_variant_string(self): for variant_string in self.falsy_variant_strings: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) + valid, error = validate_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) assert valid assert error is None - def test_parse_valid_fully_qualified_transgenic_variant(self): + def test_validate_valid_fully_qualified_transgenic_variant(self): for variant_string in self.valid_fully_qualified_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=True ) assert valid assert error is None - def test_parse_valid_basic_transgenic_variant(self): + def test_validate_valid_basic_transgenic_variant(self): for variant_string in self.valid_basic_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert valid @@ -795,7 +878,7 @@ def test_parse_valid_basic_transgenic_variant(self): def test_parse_invalid_transgenic_variant(self): for variant_string in self.invalid_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert not valid @@ -804,7 +887,7 @@ def test_parse_invalid_transgenic_variant(self): def test_parse_mismatched_transgenic_variant(self): for variant_string in self.mismatched_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert not valid @@ -831,7 +914,7 @@ def setUp(self): pd.Series(["ATG", "ATG"], name="guide_sequence"), # identical sequences ] - self.accession_test_case = AccessionTestCase() + self.accession_test_case = [AccessionTestCase(VALID_PRO_ACCESSION), AccessionTestCase(VALID_NT_ACCESSION)] def test_valid_guide_sequences(self): for column in self.valid_guide_sequences + self.invalid_index_guide_sequences: diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 106b77b2..343aa047 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -37,12 +37,12 @@ from tests.helpers.constants import ( - TEST_CDOT_TRANSCRIPT, + TEST_NT_CDOT_TRANSCRIPT, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, TEST_VARIANT_MAPPING_SCAFFOLD, - VALID_ACCESSION, + VALID_NT_ACCESSION, ) from tests.helpers.util.exceptions import awaitable_exception from tests.helpers.util.experiment import create_experiment @@ -94,7 +94,7 @@ async def setup_records_files_and_variants(session, async_client, data_files, in with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ): result = await create_variants_for_score_set(worker_ctx, uuid4().hex, score_set.id, 1, scores, counts) @@ -152,7 +152,7 @@ async def setup_mapping_output(async_client, session, score_set, empty=False): { "exception": "encountered 1 invalid variant strings.", "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)" + "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." ], }, ), @@ -173,13 +173,13 @@ async def test_create_variants_for_score_set_with_validation_error( if input_score_set == TEST_MINIMAL_SEQ_SCORESET: scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" else: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_ACCESSION}:c.1T>A" + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" with ( patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp, ): result = await create_variants_for_score_set( @@ -286,7 +286,7 @@ async def test_create_variants_for_score_set_with_existing_variants( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -309,7 +309,7 @@ async def test_create_variants_for_score_set_with_existing_variants( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -367,7 +367,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -406,7 +406,7 @@ async def test_create_variants_for_score_set( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -449,7 +449,7 @@ async def dummy_mapping_job(): patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp, patch.object( _UnixSelectorEventLoop, From 4afa8a76cbe33ff6c229aa0863a6b8b3e426e5f3 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:54:49 -0800 Subject: [PATCH 020/166] Logical names for git action checks --- .github/workflows/run-tests-on-push.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 1044f5bd..803541af 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -8,7 +8,7 @@ env: jobs: run-tests-3_9-core-dependencies: runs-on: ubuntu-20.04 - name: Pytest on Python 3.9 / Ubuntu 20.04 + name: Pytest on Core Dependencies-- Python 3.9 / Ubuntu 20.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -22,7 +22,7 @@ jobs: run-tests-3_9: runs-on: ubuntu-20.04 - name: Pytest on Python 3.9 / Ubuntu 20.04 + name: Pytest on Optional Dependencies-- Python 3.9 / Ubuntu 20.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -36,7 +36,7 @@ jobs: run-tests-3_10-core-dependencies: runs-on: ubuntu-latest - name: Pytest on Python 3.10 + name: Pytest on Core Dependencies-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -50,7 +50,7 @@ jobs: run-tests-3_10: runs-on: ubuntu-latest - name: Pytest on Python 3.10 + name: Pytest on Optional Dependencies-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -64,7 +64,7 @@ jobs: run-tests-3_11-core-dependencies: runs-on: ubuntu-latest - name: Pytest on Python 3.11 + name: Pytest on Core Dependencies-- Python 3.11 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -78,7 +78,7 @@ jobs: run-tests-3_11: runs-on: ubuntu-latest - name: Pytest on Python 3.11 + name: Pytest on Optional Dependencies-- Python 3.11 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -92,7 +92,7 @@ jobs: run-mypy-3_10: runs-on: ubuntu-latest - name: MyPy checks on Python 3.10 + name: MyPy on Full Codebase-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -106,7 +106,7 @@ jobs: run-ruff-lint: runs-on: ubuntu-latest - name: Ruff linting on Python 3.10 + name: Ruff on Full Codebase-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From 291fc7e887bea8fa264ddd0adccc7a8bea6d6427 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 15:07:11 -0800 Subject: [PATCH 021/166] Bump SeqRepo Version, Add Volume to Dev Containers --- docker-compose-dev.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 294708d0..c44b7b8f 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -23,6 +23,7 @@ services: - "8002:8000" volumes: - .:/code + - mavedb-seqrepo-dev:/usr/local/share/seqrepo worker: image: mavedb-api/mavedb-worker:dev @@ -41,6 +42,7 @@ services: LOG_CONFIG: dev volumes: - .:/code + - mavedb-seqrepo-dev:/usr/local/share/seqrepo depends_on: - db - redis @@ -77,15 +79,10 @@ services: - mavedb-seqrepo-dev:/usr/local/share/seqrepo seqrepo: - image: biocommons/seqrepo:2021-01-29 + image: biocommons/seqrepo:2024-12-20 volumes: - mavedb-seqrepo-dev:/usr/local/share/seqrepo -# rabbitmq: -# image: rabbitmq:3.8.3 -# ports: -# - "5673:5672" - volumes: mavedb-data-dev: mavedb-redis-dev: From c56b374bcf7d911c658dda0b8211751348f36083 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 15:08:33 -0800 Subject: [PATCH 022/166] Add SeqRepo based seqfetcher to data provider Prior to this, we weren't really using SeqRepo to do transcript resolution (unintentionally). Note that to use SeqRepo in this manner, a new environment variable `HGVS_SEQREPO_DIR` should be set. --- settings/.env.template | 3 ++- src/mavedb/data_providers/services.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/settings/.env.template b/settings/.env.template index 32d693af..5d4af067 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -50,7 +50,8 @@ UTA_DB_URL=postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_2018 # Environment variables for seqrepo #################################################################################################### -SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2021-01-29 +SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2024-12-20 +HGVS_SEQREPO_DIR=/usr/local/share/seqrepo/2024-12-20 #################################################################################################### # Environment variables for mapping MaveDB connection diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index 3d16a8e5..3b241bef 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -3,7 +3,7 @@ from typing import Optional, TypedDict import requests -from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", @@ -14,7 +14,7 @@ def seqfetcher() -> ChainedSeqFetcher: - return ChainedSeqFetcher(*[FastaSeqFetcher(file) for file in GENOMIC_FASTA_FILES]) + return ChainedSeqFetcher(SeqFetcher(), *[FastaSeqFetcher(file) for file in GENOMIC_FASTA_FILES]) def cdot_rest() -> RESTDataProvider: From 8165685362ca2452800df7906ca2e27bc109355e Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 16:49:20 -0800 Subject: [PATCH 023/166] Add SeqFetcher MyPy type stub --- mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi b/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi index 3df0a0ec..5c84d2b4 100644 --- a/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi +++ b/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi @@ -1,6 +1,7 @@ from typing import Union -from hgvs.dataproviders.seqfetcher import SeqFetcher +class SeqFetcher: + def __init__(self, *args) -> None: ... class FastaSeqFetcher: def __init__(self, *args, cache: bool = True) -> None: ... From db7f2505ad39b33db44c980ed85246232ef1d49d Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 12 Mar 2025 20:09:44 -0700 Subject: [PATCH 024/166] Refactor fixes --- tests/helpers/constants.py | 2 +- tests/routers/test_statistics.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index a1b528ae..01411e2b 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -698,7 +698,7 @@ "genomic": { "sequence_id": "ga4gh:SQ.em9khDCUYXrVWBfWr9r8fjBUrTjj1aig", "sequence_type": "dna", - "sequence_accessions": [VALID_ACCESSION], + "sequence_accessions": [VALID_NT_ACCESSION], "sequence_genes": [VALID_GENE], } } diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index f6ef6f6c..b2632f54 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -73,7 +73,6 @@ def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_fil # Note that we have not created indexes for this view when it is generated via metadata. This differs # from the database created via alembic, which does create indexes. PublishedVariantsMV.refresh(session, False) - session.commit() def assert_statistic(desired_field_value, response): From 47650a8d48cc0bb870934067087eed4b66a480e0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 13 Mar 2025 10:13:50 -0700 Subject: [PATCH 025/166] Use MaveHGVS to determine if variant is a multi-variant --- mypy_stubs/mavehgvs/variant.pyi | 5 ++- .../lib/validation/dataframe/variant.py | 33 ++++++++++--------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/mypy_stubs/mavehgvs/variant.pyi b/mypy_stubs/mavehgvs/variant.pyi index 35086b3d..f1d79665 100644 --- a/mypy_stubs/mavehgvs/variant.pyi +++ b/mypy_stubs/mavehgvs/variant.pyi @@ -1,4 +1,5 @@ -from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Union +from re import Match from .position import VariantPosition @@ -21,3 +22,5 @@ class Variant: prefix: str sequence: Union[str, Tuple[str, str], List[Optional[Union[str, Tuple[str, str]]]], None] + is_multi_variant: Callable[..., bool] + fullmatch: Callable[..., Optional[Match[str]]] diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 07c400d0..7a72710a 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -16,8 +16,6 @@ construct_target_sequence_mappings, ) from mavedb.lib.validation.constants.target import strict_valid_sequence_types as valid_sequence_types - - from mavedb.models.target_sequence import TargetSequence from mavedb.models.target_accession import TargetAccession @@ -193,10 +191,10 @@ def validate_hgvs_genomic_column( def validate_genomic_variant( idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" ) -> tuple[bool, Optional[str]]: - def _validate_allelic_variation(variant: str) -> bool: + def _validate_allelic_variation(transcript: str, variant: str) -> bool: """ The HGVS package is currently unable to parse allelic variation, and this doesn't seem like a planned - feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS, + feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS does support this sort of multivariant we can: - Validate that the multi-variant allele is valid HGVS. - Validate each sub-variant in an allele is valid with respect to the transcript. @@ -218,15 +216,16 @@ def _validate_allelic_variation(variant: str) -> bool: hgvs.exceptions.HGVSError If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). """ - transcript, multi_variant = variant.split(":") - - # Validate that the multi-variant allele is valid HGVS. - Variant(multi_variant) - prefix, variants = multi_variant[0:2], multi_variant[2:] - # Validate each sub-variant in an allele is valid with respect to the transcript. - for subvariant in variants.strip("[]").split(";"): - validator.validate(parser.parse(f"{transcript}:{prefix}{subvariant}"), strict=False) + # mavehgvs doesn't offer a convenient way to access the variant sub-string in a multi-variant, + # but this is the same logic it uses to parse them into component substrings. + variant_match = Variant.fullmatch(variant) + if not variant_match: + return False + + variant_string = variant_match.groupdict()["multi_variant"] + for variant_sub_string in variant_string[3:-1].split(";"): + validator.validate(parser.parse(f"{transcript}:{variant_string[0]}.{variant_sub_string}"), strict=False) return True @@ -239,11 +238,15 @@ def _validate_allelic_variation(variant: str) -> bool: for variant in variant_string.split(" "): try: - if "[" in variant: - _validate_allelic_variation(variant) + # Some issue with mavehgvs RegEx causes multivariants with a transcript identifier + # to fail RegEx validation, so we need to split the transcript and variant string up front. + transcript, variant_string = variant.split(":") + if Variant(variant_string).is_multi_variant(): + _validate_allelic_variation(transcript, variant_string) else: validator.validate(parser.parse(variant), strict=False) - except MaveHgvsParseError: + except MaveHgvsParseError as e: + logger.error("err", exc_info=e) return False, f"Failed to parse variant string '{variant}' at row {idx}." except hgvs.exceptions.HGVSError as e: return False, f"Failed to parse row {idx} with HGVS exception: {e}." From cf8a95b3bbde6aa52fd9abfcd167d26ad9075c50 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 13 Mar 2025 10:14:27 -0700 Subject: [PATCH 026/166] Fix tests for MaveHGVS parsing --- tests/validation/dataframe/test_variant.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index 931c044b..93b658cb 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -756,7 +756,7 @@ def setUp(self): self.falsy_variant_strings = [None, ""] self.valid_hgvs_nt_column = pd.Series( - [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[2A>T;1G>A]"], + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.[1G>A;2A>T]"], name=hgvs_nt_column, ) self.invalid_hgvs_nt_column = pd.Series( @@ -795,22 +795,18 @@ def test_validate_genomic_variant_nonetype_variant_string(self): assert error is None def test_validate_valid_hgvs_variant(self): - for idx, variant_string in enumerate([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]): + for idx, variant_string in enumerate(pd.concat([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column])): with self.subTest(variant_string=variant_string): - valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string, self.parser, self.validator) assert valid assert error is None def test_validate_invalid_hgvs_variant(self): - from hgvs.exceptions import HGVSError - - self.validator.validate.side_effect = HGVSError("Invalid variant") - - for idx, variant_string in enumerate((self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column)): + for idx, variant_string in enumerate(pd.concat([self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column])): with self.subTest(variant_string=variant_string): - valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string, self.parser, self.validator) assert not valid - assert f"Failed to parse row {idx} with HGVS exception:" in error + assert f"Failed to parse variant string '{variant_string}' at row {idx}" in error @unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") From 82813afae47265b27529f0827f8b68cfa7e2ef03 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sat, 29 Mar 2025 11:01:52 -0700 Subject: [PATCH 027/166] Rebase fixes (could fixup) --- tests/view_models/test_score_set.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index c155f9b7..5f45bce0 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -465,6 +465,21 @@ def test_cannot_create_score_set_with_normal_range_and_no_wild_type_score(): assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) +def test_cannot_create_score_set_without_default_ranges(): + score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test["score_ranges"] = { + "wt_score": -0.5, + "ranges": [ + {"label": "range_1", "classification": "other", "range": (-1, 0)}, + ], + } + + with pytest.raises(ValueError) as exc_info: + ScoreSetModify(**score_set_test) + + assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) + + @pytest.mark.parametrize("classification", ["normal", "abnormal", "not_specified"]) def test_can_create_score_set_with_any_range_classification(classification): wt_score = -0.5 if classification == "normal" else None @@ -477,7 +492,6 @@ def test_can_create_score_set_with_any_range_classification(classification): } ScoreSetModify(**score_set_test) - assert "Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal']" in str(exc_info.value) def test_cannot_create_score_set_with_inconsistent_base_editor_flags(): From 21fc0dcca63d35c34346b01e9b64a86db99dcea0 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Thu, 3 Apr 2025 18:45:05 +1100 Subject: [PATCH 028/166] Show the variants that have problem in error message. --- src/mavedb/lib/validation/dataframe/column.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py index ef6ee23c..a545b9f3 100644 --- a/src/mavedb/lib/validation/dataframe/column.py +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -73,18 +73,29 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: # if there is more than one target, we expect variants to be fully qualified if fully_qualified: - if not all(len(str(v).split(":")) == 2 for v in variants): + invalid_fully_qualified = {v for v in variants if len(str(v).split(":")) != 2} + if invalid_fully_qualified: raise ValidationError( - f"variants in the provided column '{column.name}' were expected to be fully qualified, but are not described in relation to an accession" - ) - if len(set(str(v).split(":")[1][:2] for v in variants)) > 1: - raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") - if not all(str(v).split(":")[1][:2] in prefixes for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") - if not all(str(v).split(":")[0] in targets for v in variants): + f"variants in the provided column '{column.name}' were expected to be fully qualified, " + "but are not described in relation to an accession. " + "Validation errors found:\n" + "\n".join(invalid_fully_qualified)) + + inconsistent_prefixes = {v for v in variants if len(set(str(v).split(":")[1][:2] for v in variants)) > 1} + if inconsistent_prefixes: raise ValidationError( - f"variant column '{column.name}' has invalid accession identifiers; some accession identifiers present in the score file were not added as targets" - ) + f"variant column '{column.name}' has inconsistent variant prefixes':\n" + "\n".join(inconsistent_prefixes)) + + invalid_prefixes = {v for v in variants if str(v).split(":")[1][:2] not in prefixes} + if invalid_prefixes: + raise ValidationError( + f"variant column '{column.name}' has invalid variant prefixes':\n" + "\n".join(invalid_prefixes)) + + invalid_accessions = {v for v in variants if str(v).split(":")[0] not in targets} + if invalid_accessions: + raise ValidationError( + f"variant column '{column.name}' has invalid accession identifiers; " + "some accession identifiers present in the score file were not added as targets." + "Validation errors found:\n" + "\n".join(invalid_accessions)) else: if len(set(v[:2] for v in variants)) > 1: From 633dcf5c4da2c9087a9755099d9d16289a417e83 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 8 Apr 2025 19:23:39 +1000 Subject: [PATCH 029/166] Modify codes to triggers --- src/mavedb/lib/validation/dataframe/column.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py index a545b9f3..5dbb6e31 100644 --- a/src/mavedb/lib/validation/dataframe/column.py +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -73,29 +73,39 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: # if there is more than one target, we expect variants to be fully qualified if fully_qualified: - invalid_fully_qualified = {v for v in variants if len(str(v).split(":")) != 2} + invalid_fully_qualified = [f"{len(str(v).split(':'))} invalid fully qualified found from row {idx}" + for idx, v in enumerate(variants) if len(str(v).split(":")) != 2] if invalid_fully_qualified: raise ValidationError( - f"variants in the provided column '{column.name}' were expected to be fully qualified, " - "but are not described in relation to an accession. " - "Validation errors found:\n" + "\n".join(invalid_fully_qualified)) + f"variant column '{column.name}' has {len(invalid_fully_qualified)} unqualified variants.", + triggers=invalid_fully_qualified + ) - inconsistent_prefixes = {v for v in variants if len(set(str(v).split(":")[1][:2] for v in variants)) > 1} + inconsistent_prefixes = [f"row {idx}: '{v}' uses inconsistent prefix '{str(v).split(':')[1][:2]}'" + for idx, v in enumerate(variants) + if len(set(str(v).split(":")[1][:2] for v in variants)) > 1] if inconsistent_prefixes: raise ValidationError( - f"variant column '{column.name}' has inconsistent variant prefixes':\n" + "\n".join(inconsistent_prefixes)) + f"variant column '{column.name}' has {len(inconsistent_prefixes)} inconsistent variant prefixes.", + triggers=inconsistent_prefixes + ) - invalid_prefixes = {v for v in variants if str(v).split(":")[1][:2] not in prefixes} + invalid_prefixes = [f"row {idx}: '{v}' uses invalid prefix '{str(v).split(':')[1][:2]}'" + for idx, v in enumerate(variants) if str(v).split(":")[1][:2] not in prefixes] if invalid_prefixes: raise ValidationError( - f"variant column '{column.name}' has invalid variant prefixes':\n" + "\n".join(invalid_prefixes)) + f"variant column '{column.name}' has {len(invalid_prefixes)} invalid variant prefixes.", + triggers=invalid_prefixes + ) - invalid_accessions = {v for v in variants if str(v).split(":")[0] not in targets} + invalid_accessions = [f"accession identifier {str(v).split(':')[0]} from row {idx}, variant {v} not found" + for idx, v in enumerate(variants) if str(v).split(":")[0] not in targets] if invalid_accessions: raise ValidationError( f"variant column '{column.name}' has invalid accession identifiers; " - "some accession identifiers present in the score file were not added as targets." - "Validation errors found:\n" + "\n".join(invalid_accessions)) + f"{len(invalid_accessions)} accession identifiers present in the score file were not added as targets.", + triggers=invalid_accessions + ) else: if len(set(v[:2] for v in variants)) > 1: From b66b66a61068cc8bb42f628fb337559af6dafe9f Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:57:26 -0800 Subject: [PATCH 030/166] Added ClinGen allele IDs to the variant data model. --- .../e8a3b5d8f885_add_clingen_allele_ids.py | 26 +++++++++++++++++++ src/mavedb/models/variant.py | 1 + 2 files changed, 27 insertions(+) create mode 100644 alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py diff --git a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py new file mode 100644 index 00000000..53f75752 --- /dev/null +++ b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py @@ -0,0 +1,26 @@ +"""Add ClinGen allele IDs + +Revision ID: e8a3b5d8f885 +Revises: aa73d39b3705 +Create Date: 2025-01-27 18:55:09.283855 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'e8a3b5d8f885' +down_revision = 'aa73d39b3705' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column('variants', sa.Column('clingen_allele_id', sa.String(), nullable=True)) + op.create_index(op.f('ix_variants_clingen_allele_id'), 'variants', ['clingen_allele_id'], unique=False) + + +def downgrade(): + op.drop_index(op.f('ix_variants_clingen_allele_id'), table_name='variants') + op.drop_column('variants', 'clingen_allele_id') diff --git a/src/mavedb/models/variant.py b/src/mavedb/models/variant.py index f3a5821a..f6426585 100644 --- a/src/mavedb/models/variant.py +++ b/src/mavedb/models/variant.py @@ -15,6 +15,7 @@ class Variant(Base): id = Column(Integer, primary_key=True) urn = Column(String(64), index=True, nullable=True, unique=True) + clingen_allele_id = Column(String, index=True, nullable=True) data = Column(JSONB, nullable=False) score_set_id = Column("scoreset_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=False) From f4d14b014c1da98999858d0e47ed125b25eee188 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:58:00 -0800 Subject: [PATCH 031/166] Added a ClinGen allele ID variant lookup endpoint to the API. --- src/mavedb/routers/variants.py | 26 ++++++++++++++++++++++++++ src/mavedb/server_main.py | 2 ++ src/mavedb/view_models/score_set.py | 5 +++-- src/mavedb/view_models/variant.py | 29 ++++++++++++++++++++++------- 4 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 src/mavedb/routers/variants.py diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py new file mode 100644 index 00000000..9f66475d --- /dev/null +++ b/src/mavedb/routers/variants.py @@ -0,0 +1,26 @@ +import logging + +from fastapi import APIRouter, Depends +from sqlalchemy.orm import Session + +from mavedb import deps +from mavedb.lib.logging import LoggedRoute +from mavedb.models.variant import Variant +from mavedb.view_models.variant import ClingenAlleleIdVariantLookupsRequest, VariantWithShortScoreSet + +router = APIRouter( + prefix="/api/v1", tags=["access keys"], responses={404: {"description": "Not found"}}, route_class=LoggedRoute +) + +logger = logging.getLogger(__name__) + + +@router.post("/variants/clingen-allele-id-lookups", response_model=list[list[VariantWithShortScoreSet]]) +def get_variants(*, request: ClingenAlleleIdVariantLookupsRequest, db: Session = Depends(deps.get_db)): + variants = db.query(Variant).filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)).all() + + variants_by_allele_id: dict[str, list[Variant]] = {allele_id: [] for allele_id in request.clingen_allele_ids} + for variant in variants: + variants_by_allele_id[variant.clingen_allele_id].append(variant) + + return [variants_by_allele_id[allele_id] for allele_id in request.clingen_allele_ids] diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index b0e966cf..e1cf4c2c 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -55,6 +55,7 @@ target_genes, taxonomies, users, + variants, ) logger = logging.getLogger(__name__) @@ -100,6 +101,7 @@ app.include_router(target_genes.router) app.include_router(taxonomies.router) app.include_router(users.router) +app.include_router(variants.router) @app.exception_handler(PermissionException) diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index 8bc19c2d..30b93d28 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -34,7 +34,6 @@ TargetGeneCreate, ) from mavedb.view_models.user import SavedUser, User -from mavedb.view_models.variant import VariantInDbBase class ExternalLink(BaseModel): @@ -441,7 +440,7 @@ class ScoreSetWithVariants(ScoreSet): are requested. """ - variants: list[VariantInDbBase] + variants: list[SavedVariant] class AdminScoreSet(ScoreSet): @@ -469,6 +468,8 @@ class ScoreSetPublicDump(SavedScoreSet): # ruff: noqa: E402 from mavedb.view_models.experiment import Experiment +from mavedb.view_models.variant import SavedVariant ShortScoreSet.update_forward_refs() ScoreSet.update_forward_refs() +ScoreSetWithVariants.update_forward_refs() diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index 830bdd5c..11d15d5e 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -8,6 +8,7 @@ class VariantBase(BaseModel): + """Properties shared by most variant view models""" urn: Optional[str] data: Any score_set_id: int @@ -19,17 +20,20 @@ class VariantBase(BaseModel): class VariantCreate(VariantBase): + """Input view model for creating variants""" pass class VariantUpdate(VariantBase): + """Input view model for updating variants""" pass -# Properties shared by models stored in DB -class VariantInDbBase(VariantBase): +class SavedVariant(VariantBase): + """Base class for variant view models handling saved variants""" id: int record_type: str = None # type: ignore + clingen_allele_id: Optional[str] _record_type_factory = record_type_validator()(set_record_type) @@ -37,11 +41,22 @@ class Config: orm_mode = True -# Properties to return to client -class Variant(VariantInDbBase): +class Variant(SavedVariant): + """Variant view model returned to most clients""" pass -# Properties stored in DB -class VariantInDb(VariantInDbBase): - pass +class VariantWithShortScoreSet(SavedVariant): + """Variant view model with a limited set of score set details""" + score_set: "ShortScoreSet" + + +class ClingenAlleleIdVariantLookupsRequest(BaseModel): + """A request to search for variants matching a list of ClinGen allele IDs""" + clingen_allele_ids: list[str] + + +# ruff: noqa: E402 +from mavedb.view_models.score_set import ShortScoreSet + +VariantWithShortScoreSet.update_forward_refs() From 04526f7949d8d6813ae01395daed391147c2e4cc Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 28 Jan 2025 11:38:15 -0800 Subject: [PATCH 032/166] Added a "get variant" endpoint; added permission checks to the variants router. --- src/mavedb/routers/variants.py | 63 +++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py index 9f66475d..eec0822f 100644 --- a/src/mavedb/routers/variants.py +++ b/src/mavedb/routers/variants.py @@ -1,10 +1,16 @@ import logging from fastapi import APIRouter, Depends -from sqlalchemy.orm import Session +from fastapi.exceptions import HTTPException +from mavedb.lib.authentication import UserData, get_current_user +from mavedb.lib.permissions import Action, assert_permission, has_permission +from sqlalchemy.exc import MultipleResultsFound, NoResultFound +from sqlalchemy.orm import Session, joinedload from mavedb import deps from mavedb.lib.logging import LoggedRoute +from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.view_models.variant import ClingenAlleleIdVariantLookupsRequest, VariantWithShortScoreSet @@ -16,11 +22,60 @@ @router.post("/variants/clingen-allele-id-lookups", response_model=list[list[VariantWithShortScoreSet]]) -def get_variants(*, request: ClingenAlleleIdVariantLookupsRequest, db: Session = Depends(deps.get_db)): - variants = db.query(Variant).filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)).all() - +def lookup_variants( + *, + request: ClingenAlleleIdVariantLookupsRequest, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user) +): + variants = ( + db.query(Variant) + .options( + joinedload(Variant.score_set).joinedload(ScoreSet.experiment), + joinedload(Variant.mapped_variants) + ) + .filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)) + .all() + ) + variants[:] = [ + variant for variant in variants if has_permission(user_data, variant.score_set, Action.READ).permitted + ] variants_by_allele_id: dict[str, list[Variant]] = {allele_id: [] for allele_id in request.clingen_allele_ids} for variant in variants: variants_by_allele_id[variant.clingen_allele_id].append(variant) return [variants_by_allele_id[allele_id] for allele_id in request.clingen_allele_ids] + + +@router.post( + "/variants/{urn}", + status_code=200, + response_model=VariantWithShortScoreSet, + responses={404: {}, 500: {}}, + response_model_exclude_none=True, +) +def get_variant( + *, + urn: str, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user) +): + """ + Fetch a single variant by URN. + """ + save_to_logging_context({"requested_resource": urn}) + try: + query = db.query(Variant).filter(Variant.urn == urn) + variant = query.one_or_none() + except MultipleResultsFound: + logger.info( + msg="Could not fetch the requested score set; Multiple such variants exist.", extra=logging_context() + ) + raise HTTPException(status_code=500, detail=f"multiple variants with URN '{urn}' were found") + + if not variant: + logger.info(msg="Could not fetch the requested variant; No such variant exists.", extra=logging_context()) + raise HTTPException(status_code=404, detail=f"variant with URN '{urn}' not found") + + assert_permission(user_data, variant.score_set, Action.READ) + return variant From b7fc08d4068a1859e531c09370783ca789cc1954 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 28 Jan 2025 11:38:45 -0800 Subject: [PATCH 033/166] Include mapped variants in get/lookup variant responses. --- src/mavedb/models/mapped_variant.py | 6 ++++-- src/mavedb/models/variant.py | 11 +++++++++-- src/mavedb/view_models/variant.py | 4 +++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/mavedb/models/mapped_variant.py b/src/mavedb/models/mapped_variant.py index 57cefd03..eca2231e 100644 --- a/src/mavedb/models/mapped_variant.py +++ b/src/mavedb/models/mapped_variant.py @@ -1,4 +1,5 @@ from datetime import date +from typing import TYPE_CHECKING from sqlalchemy import Boolean, Column, Date, ForeignKey, Integer, String from sqlalchemy.dialects.postgresql import JSONB @@ -6,7 +7,8 @@ from mavedb.db.base import Base -from .variant import Variant +if TYPE_CHECKING: + from .variant import Variant class MappedVariant(Base): @@ -24,4 +26,4 @@ class MappedVariant(Base): current = Column(Boolean, nullable=False) variant_id = Column(Integer, ForeignKey("variants.id"), index=True, nullable=False) - variant: Mapped[Variant] = relationship("Variant", backref=backref("mapped_variants", cascade="all,delete-orphan")) + variant: Mapped["Variant"] = relationship("Variant", back_populates="mapped_variants") diff --git a/src/mavedb/models/variant.py b/src/mavedb/models/variant.py index f6426585..f81878bb 100644 --- a/src/mavedb/models/variant.py +++ b/src/mavedb/models/variant.py @@ -1,4 +1,5 @@ from datetime import date +from typing import TYPE_CHECKING, List from sqlalchemy import Column, Date, ForeignKey, Integer, String from sqlalchemy.dialects.postgresql import JSONB @@ -6,7 +7,9 @@ from mavedb.db.base import Base -from .score_set import ScoreSet +if TYPE_CHECKING: + from .mapped_variant import MappedVariant + from .score_set import ScoreSet class Variant(Base): @@ -20,7 +23,7 @@ class Variant(Base): score_set_id = Column("scoreset_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=False) # TODO examine if delete-orphan is necessary, explore cascade - score_set: Mapped[ScoreSet] = relationship(back_populates="variants") + score_set: Mapped["ScoreSet"] = relationship(back_populates="variants") hgvs_nt = Column(String, nullable=True) hgvs_pro = Column(String, nullable=True) @@ -28,3 +31,7 @@ class Variant(Base): creation_date = Column(Date, nullable=False, default=date.today) modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) + + mapped_variants: Mapped[List["MappedVariant"]] = relationship( + back_populates="variant", cascade="all, delete-orphan" + ) diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index 11d15d5e..5eb5bb4c 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -1,6 +1,7 @@ from datetime import date from typing import Any +from mavedb.view_models.mapped_variant import MappedVariant from pydantic.types import Optional from mavedb.view_models import record_type_validator, set_record_type @@ -47,8 +48,9 @@ class Variant(SavedVariant): class VariantWithShortScoreSet(SavedVariant): - """Variant view model with a limited set of score set details""" + """Variant view model with mapped variants and a limited set of score set details""" score_set: "ShortScoreSet" + mapped_variants: list[MappedVariant] class ClingenAlleleIdVariantLookupsRequest(BaseModel): From 0d5b1eb80d69f3346a314aca5151904c4f358f1f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 28 Jan 2025 21:54:06 -0800 Subject: [PATCH 034/166] ClinVar Variant Table --- .../34026092c7f8_clinvar_variant_table.py | 56 +++++++++++++++++++ src/mavedb/models/__init__.py | 1 + src/mavedb/models/clinvar_variant.py | 28 ++++++++++ src/mavedb/models/mapped_variant.py | 6 +- 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 alembic/versions/34026092c7f8_clinvar_variant_table.py create mode 100644 src/mavedb/models/clinvar_variant.py diff --git a/alembic/versions/34026092c7f8_clinvar_variant_table.py b/alembic/versions/34026092c7f8_clinvar_variant_table.py new file mode 100644 index 00000000..730c8e66 --- /dev/null +++ b/alembic/versions/34026092c7f8_clinvar_variant_table.py @@ -0,0 +1,56 @@ +"""clinvar variant table + +Revision ID: 34026092c7f8 +Revises: e8a3b5d8f885 +Create Date: 2025-01-28 21:48:42.532346 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "34026092c7f8" +down_revision = "e8a3b5d8f885" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "clinvar_variants", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("allele_id", sa.Integer(), nullable=False), + sa.Column("gene_symbol", sa.String(), nullable=False), + sa.Column("clinical_significance", sa.String(), nullable=False), + sa.Column("clinical_review_status", sa.String(), nullable=False), + sa.Column("clinvar_db_version", sa.String(), nullable=False), + sa.Column("creation_date", sa.Date(), nullable=False), + sa.Column("modification_date", sa.Date(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index(op.f("ix_clinvar_variants_allele_id"), "clinvar_variants", ["allele_id"], unique=False) + op.add_column("mapped_variants", sa.Column("clinvar_variant_id", sa.Integer(), nullable=True)) + op.create_index( + op.f("ix_mapped_variants_clinvar_variant_id"), "mapped_variants", ["clinvar_variant_id"], unique=False + ) + op.create_foreign_key( + "mapped_variant_clinvar_variant_id_foreign_key_constraint", + "mapped_variants", + "clinvar_variants", + ["clinvar_variant_id"], + ["id"], + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint("mapped_variants_clinvar_variant_foreign_key_constraint", "mapped_variants", type_="foreignkey") + op.drop_index(op.f("ix_mapped_variants_clinvar_variant_id"), table_name="mapped_variants") + op.drop_column("mapped_variants", "clinvar_variant_id") + op.drop_index(op.f("ix_clinvar_variants_allele_id"), table_name="clinvar_variants") + op.drop_table("clinvar_variants") + # ### end Alembic commands ### diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index a1a2c0af..111809a4 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -1,6 +1,7 @@ __all__ = [ "access_key", "collection", + "clinvar_variant", "controlled_keyword", "doi_identifier", "ensembl_identifier", diff --git a/src/mavedb/models/clinvar_variant.py b/src/mavedb/models/clinvar_variant.py new file mode 100644 index 00000000..7fb0d191 --- /dev/null +++ b/src/mavedb/models/clinvar_variant.py @@ -0,0 +1,28 @@ +from datetime import date +from typing import TYPE_CHECKING + +from sqlalchemy import Column, Date, Integer, String +from sqlalchemy.orm import Mapped, relationship + +from mavedb.db.base import Base + +if TYPE_CHECKING: + from .variant import Variant + + +class ClinvarVariant(Base): + __tablename__ = "clinvar_variants" + + id = Column(Integer, primary_key=True) + allele_id = Column(Integer, nullable=False, index=True) + gene_symbol = Column(String, nullable=False) + + clinical_significance = Column(String, nullable=False) + clinical_review_status = Column(String, nullable=False) + + clinvar_db_version = Column(String, nullable=False) + + creation_date = Column(Date, nullable=False, default=date.today) + modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) + + mapped_variants: Mapped[list["Variant"]] = relationship(back_populates="clinvar_variant") diff --git a/src/mavedb/models/mapped_variant.py b/src/mavedb/models/mapped_variant.py index eca2231e..0648832d 100644 --- a/src/mavedb/models/mapped_variant.py +++ b/src/mavedb/models/mapped_variant.py @@ -3,11 +3,12 @@ from sqlalchemy import Boolean, Column, Date, ForeignKey, Integer, String from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Mapped, backref, relationship +from sqlalchemy.orm import Mapped, relationship from mavedb.db.base import Base if TYPE_CHECKING: + from .clinvar_variant import ClinvarVariant from .variant import Variant @@ -27,3 +28,6 @@ class MappedVariant(Base): variant_id = Column(Integer, ForeignKey("variants.id"), index=True, nullable=False) variant: Mapped["Variant"] = relationship("Variant", back_populates="mapped_variants") + + clinvar_variant_id = Column(Integer, ForeignKey("clinvar_variants.id"), nullable=True, index=True) + clinvar_variant: Mapped["ClinvarVariant"] = relationship("ClinvarVariant", back_populates="mapped_variants") From 4582b75c2248bc8ae51d530d1c997c48889fcda4 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 28 Jan 2025 22:25:59 -0800 Subject: [PATCH 035/166] fixup --- src/mavedb/models/clinvar_variant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/models/clinvar_variant.py b/src/mavedb/models/clinvar_variant.py index 7fb0d191..f2ba2f00 100644 --- a/src/mavedb/models/clinvar_variant.py +++ b/src/mavedb/models/clinvar_variant.py @@ -7,7 +7,7 @@ from mavedb.db.base import Base if TYPE_CHECKING: - from .variant import Variant + from .mapped_variant import MappedVariant class ClinvarVariant(Base): @@ -25,4 +25,4 @@ class ClinvarVariant(Base): creation_date = Column(Date, nullable=False, default=date.today) modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) - mapped_variants: Mapped[list["Variant"]] = relationship(back_populates="clinvar_variant") + mapped_variants: Mapped[list["MappedVariant"]] = relationship(back_populates="clinvar_variant") From 89edb37472188ae4859bf1035ea811eea813dc4a Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 29 Jan 2025 16:32:47 -0800 Subject: [PATCH 036/166] Minimal Script for ClinVar Variant Data Refresh --- src/mavedb/scripts/environment.py | 2 +- .../scripts/refresh_clinvar_variant_data.py | 135 ++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/scripts/refresh_clinvar_variant_data.py diff --git a/src/mavedb/scripts/environment.py b/src/mavedb/scripts/environment.py index f773f55f..66bdbb78 100644 --- a/src/mavedb/scripts/environment.py +++ b/src/mavedb/scripts/environment.py @@ -61,7 +61,7 @@ def with_database_session(command=None, *, pass_action: bool = False): The *command* callable must be a :py:class:`click.Command` instance. The decorated *command* is called with a ``db`` keyword argument to provide - a :class:`~id3c.db.session.DatabaseSession` object. The call happens + a :class:`~sqlalchemy.Session` object. The call happens within an exception handler that commits or rollsback the database transaction, possibly interactively. Three new options are added to the *command* (``--dry-run``, ``--prompt``, and ``--commit``) to control this diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py new file mode 100644 index 00000000..c0f30533 --- /dev/null +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -0,0 +1,135 @@ +import click +import requests +import csv +import time +import logging +import gzip +import random +import io + +from typing import Dict, Any, Optional +from datetime import date + +from sqlalchemy import select, distinct, func +from sqlalchemy.orm import Session + +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.clinvar_variant import ClinvarVariant +from mavedb.scripts.environment import with_database_session + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def fetch_clinvar_variant_summary_tsv(month: Optional[str], year: Optional[str]) -> bytes: + if month is None and year is None: + url = "https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz" + else: + url = f"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/archive/variant_summary_{year}-{month}.txt.gz" + + response = requests.get(url, stream=True) + response.raise_for_status() + return response.content + + +def parse_tsv(tsv_content: bytes) -> Dict[int, Dict[str, str]]: + with gzip.open(filename=io.BytesIO(tsv_content), mode="rt") as f: + # This readlines object will only be a list of bytes if the file is opened in "rb" mode. + reader = csv.DictReader(f.readlines(), delimiter="\t") # type: ignore + data = {int(row["#AlleleID"]): row for row in reader} + + return data + + +def query_clingen_allele_api(allele_id: str) -> Dict[str, Any]: + url = f"https://reg.clinicalgenome.org/allele/{allele_id}" + retries = 5 + for i in range(retries): + try: + response = requests.get(url) + response.raise_for_status() + break + except requests.RequestException as e: + if i < retries - 1: + wait_time = (2**i) + random.uniform(0, 1) + logger.warning(f"Request failed ({e}), retrying in {wait_time:.2f} seconds...") + time.sleep(wait_time) + else: + logger.error(f"Request failed after {retries} attempts: {e}") + raise + + logger.debug(f"Fetched ClinGen data for allele ID {allele_id}.") + return response.json() + + +def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[str]) -> None: + tsv_content = fetch_clinvar_variant_summary_tsv(month, year) + tsv_data = parse_tsv(tsv_content) + version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" + logger.info(f"Fetched TSV variant data for ClinVar for {version}.") + + total_variants_with_clingen_ids = db.scalar(func.count(distinct(Variant.clingen_allele_id))) + clingen_ids = db.scalars( + select(distinct(Variant.clingen_allele_id)).where(Variant.clingen_allele_id.is_not(None)) + ).all() + + logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") + for index, clingen_id in enumerate(clingen_ids): + if total_variants_with_clingen_ids > 0 and index % (total_variants_with_clingen_ids // 100) == 0: + logger.info(f"Progress: {index / total_variants_with_clingen_ids:.0%}") + + # Guaranteed based on our query filters. + clingen_data = query_clingen_allele_api(clingen_id) # type: ignore + clinvar_allele_id = clingen_data.get("externalRecords", {}).get("ClinVarAlleles", [{}])[0].get("alleleId") + + if not clinvar_allele_id or clinvar_allele_id not in tsv_data: + logger.debug( + f"No ClinVar variant data found for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." + ) + continue + + variant_data = tsv_data[clinvar_allele_id] + clinvar_variant = db.scalars( + select(ClinvarVariant).where( + ClinvarVariant.allele_id == clinvar_allele_id, ClinvarVariant.clinvar_db_version == version + ) + ).one_or_none() + if clinvar_variant: + clinvar_variant.gene_symbol = variant_data.get("GeneSymbol") + clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") + clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") + else: + clinvar_variant = ClinvarVariant( + allele_id=clinvar_allele_id, + gene_symbol=variant_data.get("GeneSymbol"), + clinical_significance=variant_data.get("ClinicalSignificance"), + clinical_review_status=variant_data.get("ReviewStatus"), + clinvar_db_version=version, + ) + + db.add(clinvar_variant) + + variants_with_clingen_allele_id = db.scalars( + select(MappedVariant).join(Variant).where(Variant.clingen_allele_id == clingen_id) + ).all() + for variant in variants_with_clingen_allele_id: + variant.clinvar_variant_id = clinvar_variant.id + db.add(variant) + + db.commit() + logger.debug( + f"Added ClinVar variant data ({clinvar_allele_id}) for ClinGen allele ID {clingen_id}. ({index + 1}/{total_variants_with_clingen_ids})." + ) + + +@click.command() +@with_database_session +@click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") +@click.option("--year", default=None, help="Populate mapped variants for every score set in MaveDB.") +def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: Optional[str]) -> None: + refresh_clinvar_variants(db, month, year) + + +if __name__ == "__main__": + refresh_clinvar_variants_command() From 7ec96721a49b8965d2cefd855b7f008dffded8bf Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 29 Jan 2025 20:53:57 -0800 Subject: [PATCH 037/166] View model updates to support clinvar variants --- src/mavedb/view_models/clinvar_variant.py | 51 +++++++++++++++++++++++ src/mavedb/view_models/mapped_variant.py | 18 ++++++-- src/mavedb/view_models/variant.py | 29 ++++++++++++- 3 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 src/mavedb/view_models/clinvar_variant.py diff --git a/src/mavedb/view_models/clinvar_variant.py b/src/mavedb/view_models/clinvar_variant.py new file mode 100644 index 00000000..f37e0c8e --- /dev/null +++ b/src/mavedb/view_models/clinvar_variant.py @@ -0,0 +1,51 @@ +# See https://pydantic-docs.helpmanual.io/usage/postponed_annotations/#self-referencing-models +from __future__ import annotations + +from datetime import date +from typing import Optional, Sequence + +from mavedb.view_models import record_type_validator, set_record_type +from mavedb.view_models.base.base import BaseModel + + +class ClinvarVariantBase(BaseModel): + allele_id: int + gene_symbol: str + clinical_significance: str + clinical_review_status: str + clinvar_db_version: str + + +class ClinvarVariantCreate(ClinvarVariantBase): + mapped_variants: Optional[list[MappedVariantCreate]] = None + + +class ClinvarVariantUpdate(ClinvarVariantBase): + pass + + +# Properties shared by models stored in DB +class SavedClinvarVariant(ClinvarVariantBase): + id: int + modification_date: date + creation_date: date + mapped_variants: Sequence[SavedMappedVariant] + + record_type: str = None # type: ignore + _record_type_factory = record_type_validator()(set_record_type) + + class Config: + orm_mode = True + + +# Properties to return to non-admin clients +class ClinvarVariant(SavedClinvarVariant): + mapped_variants: Sequence[MappedVariant] + + +# ruff: noqa: E402 +from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate + +ClinvarVariantCreate.update_forward_refs() +SavedClinvarVariant.update_forward_refs() +ClinvarVariant.update_forward_refs() diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index 397084de..e3aff59e 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -1,3 +1,6 @@ +# See https://pydantic-docs.helpmanual.io/usage/postponed_annotations/#self-referencing-models +from __future__ import annotations + from datetime import date from typing import Any, Optional @@ -18,7 +21,7 @@ class MappedVariantBase(BaseModel): class MappedVariantCreate(MappedVariantBase): - pass + clinvar_variant: Optional[ClinvarVariantCreate] class MappedVariantUpdate(MappedVariantBase): @@ -28,8 +31,9 @@ class MappedVariantUpdate(MappedVariantBase): # Properties shared by models stored in DB class SavedMappedVariant(MappedVariantBase): id: int - record_type: str = None # type: ignore + clinvar_variant: Optional[SavedClinvarVariant] + record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) class Config: @@ -38,4 +42,12 @@ class Config: # Properties to return to non-admin clients class MappedVariant(SavedMappedVariant): - pass + clinvar_variant: Optional[ClinvarVariant] + + +# ruff: noqa: E402 +from mavedb.view_models.clinvar_variant import ClinvarVariant, ClinvarVariantCreate, SavedClinvarVariant + +MappedVariantCreate.update_forward_refs() +SavedMappedVariant.update_forward_refs() +MappedVariant.update_forward_refs() diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index 5eb5bb4c..e418a479 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -1,7 +1,7 @@ from datetime import date from typing import Any -from mavedb.view_models.mapped_variant import MappedVariant +from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant from pydantic.types import Optional from mavedb.view_models import record_type_validator, set_record_type @@ -10,6 +10,7 @@ class VariantBase(BaseModel): """Properties shared by most variant view models""" + urn: Optional[str] data: Any score_set_id: int @@ -22,16 +23,19 @@ class VariantBase(BaseModel): class VariantCreate(VariantBase): """Input view model for creating variants""" + pass class VariantUpdate(VariantBase): """Input view model for updating variants""" + pass class SavedVariant(VariantBase): """Base class for variant view models handling saved variants""" + id: int record_type: str = None # type: ignore clingen_allele_id: Optional[str] @@ -42,19 +46,42 @@ class Config: orm_mode = True +class SavedVariantWithMappedVariant(SavedVariant): + """Class for saved variant with any associated mapped variants""" + + mapped_variant: Optional[SavedMappedVariant] + + @classmethod + def from_orm(cls, obj: Any): + try: + obj.mapped_variant = next( + mapped_variant for mapped_variant in obj.mapped_variants if mapped_variant.current + ) + except (AttributeError, StopIteration): + obj.mapped_variant = None + return super().from_orm(obj) + + class Variant(SavedVariant): """Variant view model returned to most clients""" + pass +class VariantWithMappedVariant(SavedVariantWithMappedVariant): + mapped_variant: Optional[MappedVariant] + + class VariantWithShortScoreSet(SavedVariant): """Variant view model with mapped variants and a limited set of score set details""" + score_set: "ShortScoreSet" mapped_variants: list[MappedVariant] class ClingenAlleleIdVariantLookupsRequest(BaseModel): """A request to search for variants matching a list of ClinGen allele IDs""" + clingen_allele_ids: list[str] From 47baa03ceed6c26f2715112bd46fd79a155ee58b Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 29 Jan 2025 21:27:07 -0800 Subject: [PATCH 038/166] Add check for Nonetype clingen allele ids --- src/mavedb/routers/variants.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py index eec0822f..32a9e045 100644 --- a/src/mavedb/routers/variants.py +++ b/src/mavedb/routers/variants.py @@ -4,7 +4,7 @@ from fastapi.exceptions import HTTPException from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.permissions import Action, assert_permission, has_permission -from sqlalchemy.exc import MultipleResultsFound, NoResultFound +from sqlalchemy.exc import MultipleResultsFound from sqlalchemy.orm import Session, joinedload from mavedb import deps @@ -26,40 +26,34 @@ def lookup_variants( *, request: ClingenAlleleIdVariantLookupsRequest, db: Session = Depends(deps.get_db), - user_data: UserData = Depends(get_current_user) + user_data: UserData = Depends(get_current_user), ): variants = ( db.query(Variant) - .options( - joinedload(Variant.score_set).joinedload(ScoreSet.experiment), - joinedload(Variant.mapped_variants) - ) - .filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)) - .all() + .options(joinedload(Variant.score_set).joinedload(ScoreSet.experiment), joinedload(Variant.mapped_variants)) + .filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)) + .all() ) variants[:] = [ variant for variant in variants if has_permission(user_data, variant.score_set, Action.READ).permitted - ] + ] variants_by_allele_id: dict[str, list[Variant]] = {allele_id: [] for allele_id in request.clingen_allele_ids} for variant in variants: - variants_by_allele_id[variant.clingen_allele_id].append(variant) - + # mypy is quite stubborn about this potentially being None. + if variant.clingen_allele_id is not None: + variants_by_allele_id[variant.clingen_allele_id].append(variant) # type: ignore + return [variants_by_allele_id[allele_id] for allele_id in request.clingen_allele_ids] @router.post( - "/variants/{urn}", + "/variants/{urn}", status_code=200, response_model=VariantWithShortScoreSet, responses={404: {}, 500: {}}, response_model_exclude_none=True, ) -def get_variant( - *, - urn: str, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(get_current_user) -): +def get_variant(*, urn: str, db: Session = Depends(deps.get_db), user_data: UserData = Depends(get_current_user)): """ Fetch a single variant by URN. """ From ea69e68b4880c9c4d9527d77c1cd5b355be042a2 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 29 Jan 2025 22:04:35 -0800 Subject: [PATCH 039/166] HACK: Expedient solution for surfacing clinsig/reviewstat in CSV score tables --- src/mavedb/lib/score_sets.py | 61 +++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 1d650eb6..c68507d5 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from pandas.testing import assert_index_equal -from sqlalchemy import Integer, cast, func, or_, select +from sqlalchemy import Integer, cast, func, null, or_, select from sqlalchemy.orm import Session, aliased, contains_eager, joinedload, selectinload from mavedb.lib.exceptions import ValidationError @@ -23,6 +23,7 @@ from mavedb.lib.mave.utils import is_csv_null from mavedb.lib.validation.constants.general import null_values_list from mavedb.lib.validation.utilities import is_null as validate_is_null +from mavedb.models.clinvar_variant import ClinvarVariant from mavedb.models.contributor import Contributor from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.doi_identifier import DoiIdentifier @@ -39,6 +40,7 @@ from mavedb.models.score_set_publication_identifier import ( ScoreSetPublicationIdentifierAssociation, ) +from mavedb.models.mapped_variant import MappedVariant from mavedb.models.target_accession import TargetAccession from mavedb.models.target_gene import TargetGene from mavedb.models.target_sequence import TargetSequence @@ -407,11 +409,14 @@ def get_score_set_counts_as_csv( ) -> str: assert type(score_set.dataset_columns) is dict count_columns = [str(x) for x in list(score_set.dataset_columns.get("count_columns", []))] - columns = ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns + # HACK + columns = ( + ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns + ["mavedb_clinsig", "mavedb_reviewstat"] + ) type_column = "count_data" variants_query = ( - select(Variant) + select(Variant, null(), null()) .where(Variant.score_set_id == score_set.id) .order_by(cast(func.split_part(Variant.urn, "#", 2), Integer)) ) @@ -419,12 +424,10 @@ def get_score_set_counts_as_csv( variants_query = variants_query.offset(start) if limit: variants_query = variants_query.limit(limit) - variants = db.scalars(variants_query).all() - - rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) - if drop_na_columns: - rows_data, columns = drop_na_columns_from_csv_file_rows(rows_data, columns) + variants = db.execute(variants_query).all() + # HACK: Hideous hack for expediency... + rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) # type: ignore stream = io.StringIO() writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL) writer.writeheader() @@ -441,11 +444,21 @@ def get_score_set_scores_as_csv( ) -> str: assert type(score_set.dataset_columns) is dict score_columns = [str(x) for x in list(score_set.dataset_columns.get("score_columns", []))] - columns = ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + score_columns + # HACK + columns = ( + ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + score_columns + ["mavedb_clinsig", "mavedb_reviewstat"] + ) type_column = "score_data" + # HACK: This is a poorly tested and very temporary solution to surface clinical significance and + # clinical review status within the CSV export in a way our front end can handle and display. + current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.current.is_(True)).subquery() variants_query = ( - select(Variant) + select(Variant, ClinvarVariant.clinical_significance, ClinvarVariant.clinical_review_status) + .join( + current_mapped_variants_subquery, Variant.id == current_mapped_variants_subquery.c.variant_id, isouter=True + ) + .join(ClinvarVariant, current_mapped_variants_subquery.c.clinvar_variant_id == ClinvarVariant.id, isouter=True) .where(Variant.score_set_id == score_set.id) .order_by(cast(func.split_part(Variant.urn, "#", 2), Integer)) ) @@ -453,12 +466,9 @@ def get_score_set_scores_as_csv( variants_query = variants_query.offset(start) if limit: variants_query = variants_query.limit(limit) - variants = db.scalars(variants_query).all() - - rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) - if drop_na_columns: - rows_data, columns = drop_na_columns_from_csv_file_rows(rows_data, columns) + variants = db.execute(variants_query).all() + rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) # type: ignore stream = io.StringIO() writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL) writer.writeheader() @@ -497,7 +507,9 @@ def is_null(value): return null_values_re.fullmatch(value) or not value -def variant_to_csv_row(variant: Variant, columns: list[str], dtype: str, na_rep="NA") -> dict[str, Any]: +def variant_to_csv_row( + variant: tuple[Variant, str, str], columns: list[str], dtype: str, na_rep="NA" +) -> dict[str, Any]: """ Format a variant into a containing the keys specified in `columns`. @@ -519,24 +531,29 @@ def variant_to_csv_row(variant: Variant, columns: list[str], dtype: str, na_rep= row = {} for column_key in columns: if column_key == "hgvs_nt": - value = str(variant.hgvs_nt) + value = str(variant[0].hgvs_nt) elif column_key == "hgvs_pro": - value = str(variant.hgvs_pro) + value = str(variant[0].hgvs_pro) elif column_key == "hgvs_splice": - value = str(variant.hgvs_splice) + value = str(variant[0].hgvs_splice) elif column_key == "accession": - value = str(variant.urn) + value = str(variant[0].urn) else: - parent = variant.data.get(dtype) if variant.data else None + parent = variant[0].data.get(dtype) if variant[0].data else None value = str(parent.get(column_key)) if parent else na_rep if is_null(value): value = na_rep row[column_key] = value + + # HACK: Overwrite any potential values of ClinVar fields present in the data + # object with db results from the tuple directly. + row["mavedb_clinsig"] = variant[1] + row["mavedb_reviewstat"] = variant[2] return row def variants_to_csv_rows( - variants: Sequence[Variant], columns: list[str], dtype: str, na_rep="NA" + variants: Sequence[tuple[Variant, str, str]], columns: list[str], dtype: str, na_rep="NA" ) -> Iterable[dict[str, Any]]: """ Format each variant into a dictionary row containing the keys specified in `columns`. From 569d40c7a8ce91b565891b9c9fbc04f488557be4 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 28 Jan 2025 14:40:01 -0800 Subject: [PATCH 040/166] Fixed HTTP method typo. --- src/mavedb/routers/variants.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py index 32a9e045..fe7e97bd 100644 --- a/src/mavedb/routers/variants.py +++ b/src/mavedb/routers/variants.py @@ -39,14 +39,12 @@ def lookup_variants( ] variants_by_allele_id: dict[str, list[Variant]] = {allele_id: [] for allele_id in request.clingen_allele_ids} for variant in variants: - # mypy is quite stubborn about this potentially being None. - if variant.clingen_allele_id is not None: - variants_by_allele_id[variant.clingen_allele_id].append(variant) # type: ignore + variants_by_allele_id[variant.clingen_allele_id].append(variant) return [variants_by_allele_id[allele_id] for allele_id in request.clingen_allele_ids] -@router.post( +@router.get( "/variants/{urn}", status_code=200, response_model=VariantWithShortScoreSet, From 56b05ae4dc72a626d6195a80d9d42a46560c5ff1 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Thu, 30 Jan 2025 14:09:57 -0800 Subject: [PATCH 041/166] Include full score set details in "get variant" response. --- src/mavedb/routers/variants.py | 4 ++-- src/mavedb/view_models/variant.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py index fe7e97bd..7557acff 100644 --- a/src/mavedb/routers/variants.py +++ b/src/mavedb/routers/variants.py @@ -12,7 +12,7 @@ from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from mavedb.view_models.variant import ClingenAlleleIdVariantLookupsRequest, VariantWithShortScoreSet +from mavedb.view_models.variant import ClingenAlleleIdVariantLookupsRequest, VariantWithScoreSet, VariantWithShortScoreSet router = APIRouter( prefix="/api/v1", tags=["access keys"], responses={404: {"description": "Not found"}}, route_class=LoggedRoute @@ -47,7 +47,7 @@ def lookup_variants( @router.get( "/variants/{urn}", status_code=200, - response_model=VariantWithShortScoreSet, + response_model=VariantWithScoreSet, responses={404: {}, 500: {}}, response_model_exclude_none=True, ) diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index e418a479..affc0d48 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -68,8 +68,10 @@ class Variant(SavedVariant): pass -class VariantWithMappedVariant(SavedVariantWithMappedVariant): - mapped_variant: Optional[MappedVariant] +class VariantWithScoreSet(SavedVariant): + """Variant view model with mapped variants and score set""" + score_set: "ScoreSet" + mapped_variants: list[MappedVariant] class VariantWithShortScoreSet(SavedVariant): @@ -86,6 +88,7 @@ class ClingenAlleleIdVariantLookupsRequest(BaseModel): # ruff: noqa: E402 -from mavedb.view_models.score_set import ShortScoreSet +from mavedb.view_models.score_set import ScoreSet, ShortScoreSet +VariantWithScoreSet.update_forward_refs() VariantWithShortScoreSet.update_forward_refs() From 0063f726209119a578d9f05a680974f6eff9c95a Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Thu, 30 Jan 2025 23:40:41 -0800 Subject: [PATCH 042/166] Revised the temporary hack that presents ClinVar significances as score set CSV columns. - Changed the column names to match what the UI expects. - Changed the mapped variant filter from current=true to vrs_version=1.3. --- src/mavedb/lib/score_sets.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index c68507d5..d4b05eba 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -411,7 +411,7 @@ def get_score_set_counts_as_csv( count_columns = [str(x) for x in list(score_set.dataset_columns.get("count_columns", []))] # HACK columns = ( - ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns + ["mavedb_clinsig", "mavedb_reviewstat"] + ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns + ["mavedb_clnsig", "mavedb_clnrevstat"] ) type_column = "count_data" @@ -446,13 +446,14 @@ def get_score_set_scores_as_csv( score_columns = [str(x) for x in list(score_set.dataset_columns.get("score_columns", []))] # HACK columns = ( - ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + score_columns + ["mavedb_clinsig", "mavedb_reviewstat"] + ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + score_columns + ["mavedb_clnsig", "mavedb_clnrevstat"] ) type_column = "score_data" # HACK: This is a poorly tested and very temporary solution to surface clinical significance and # clinical review status within the CSV export in a way our front end can handle and display. - current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.current.is_(True)).subquery() + # current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.current.is_(True)).subquery() + current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.vrs_version == '1.3').subquery() variants_query = ( select(Variant, ClinvarVariant.clinical_significance, ClinvarVariant.clinical_review_status) .join( @@ -547,8 +548,8 @@ def variant_to_csv_row( # HACK: Overwrite any potential values of ClinVar fields present in the data # object with db results from the tuple directly. - row["mavedb_clinsig"] = variant[1] - row["mavedb_reviewstat"] = variant[2] + row["mavedb_clnsig"] = variant[1] + row["mavedb_clnrevstat"] = variant[2] return row From b5b599e6ce28554d0ee1dfefce49af3c8fb32673 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Thu, 30 Jan 2025 23:41:00 -0800 Subject: [PATCH 043/166] Bug fixes --- src/mavedb/view_models/clinvar_variant.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mavedb/view_models/clinvar_variant.py b/src/mavedb/view_models/clinvar_variant.py index f37e0c8e..4c81d276 100644 --- a/src/mavedb/view_models/clinvar_variant.py +++ b/src/mavedb/view_models/clinvar_variant.py @@ -29,7 +29,7 @@ class SavedClinvarVariant(ClinvarVariantBase): id: int modification_date: date creation_date: date - mapped_variants: Sequence[SavedMappedVariant] + #mapped_variants: Sequence[SavedMappedVariant] record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) @@ -40,7 +40,8 @@ class Config: # Properties to return to non-admin clients class ClinvarVariant(SavedClinvarVariant): - mapped_variants: Sequence[MappedVariant] + pass + #mapped_variants: Sequence[MappedVariant] # ruff: noqa: E402 From 8a40392ee48370aec22c88d95aa918c37580505f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 18 Feb 2025 15:58:00 -0800 Subject: [PATCH 044/166] Genericize ClinVar Variants Table --- ...be581_genericize_clinvar_variants_table.py | 101 ++++++++++++++++++ src/mavedb/lib/score_sets.py | 17 ++- src/mavedb/models/__init__.py | 2 +- src/mavedb/models/clinical_control.py | 35 ++++++ .../models/clinical_control_mapped_variant.py | 11 ++ src/mavedb/models/clinvar_variant.py | 28 ----- src/mavedb/models/mapped_variant.py | 10 +- .../scripts/refresh_clinvar_variant_data.py | 17 +-- ...clinvar_variant.py => clinical_control.py} | 26 ++--- src/mavedb/view_models/mapped_variant.py | 10 +- 10 files changed, 190 insertions(+), 67 deletions(-) create mode 100644 alembic/versions/695b73abe581_genericize_clinvar_variants_table.py create mode 100644 src/mavedb/models/clinical_control.py create mode 100644 src/mavedb/models/clinical_control_mapped_variant.py delete mode 100644 src/mavedb/models/clinvar_variant.py rename src/mavedb/view_models/{clinvar_variant.py => clinical_control.py} (64%) diff --git a/alembic/versions/695b73abe581_genericize_clinvar_variants_table.py b/alembic/versions/695b73abe581_genericize_clinvar_variants_table.py new file mode 100644 index 00000000..29dfcbad --- /dev/null +++ b/alembic/versions/695b73abe581_genericize_clinvar_variants_table.py @@ -0,0 +1,101 @@ +"""genericize clinvar variants table + +Revision ID: 695b73abe581 +Revises: 34026092c7f8 +Create Date: 2025-02-18 11:54:15.243078 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "695b73abe581" +down_revision = "34026092c7f8" +branch_labels = None +depends_on = None + + +def upgrade(): + op.rename_table("clinvar_variants", "clinical_controls") + op.execute("ALTER SEQUENCE clinvar_variants_id_seq RENAME TO clinical_controls_id_seq") + op.execute("ALTER INDEX clinvar_variants_pkey RENAME TO clinical_controls_pkey") + + op.alter_column("clinical_controls", "clinvar_db_version", nullable=False, new_column_name="db_version") + op.alter_column("clinical_controls", "allele_id", nullable=False, new_column_name="db_identifier") + op.add_column("clinical_controls", sa.Column("db_name", sa.String(), nullable=True)) + + op.create_index("ix_clinical_controls_gene_symbol", "clinical_controls", ["gene_symbol"]) + op.create_index("ix_clinical_controls_db_name", "clinical_controls", ["db_name"]) + op.create_index("ix_clinical_controls_db_identifier", "clinical_controls", ["db_identifier"]) + op.create_index("ix_clinical_controls_db_version", "clinical_controls", ["db_version"]) + + op.create_table( + "mapped_variants_clinical_controls", + sa.Column("mapped_variant_id", sa.Integer(), nullable=False), + sa.Column("clinical_control_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["mapped_variant_id"], + ["mapped_variants.id"], + ), + sa.ForeignKeyConstraint( + ["clinical_control_id"], + ["clinical_controls.id"], + ), + sa.PrimaryKeyConstraint("mapped_variant_id", "clinical_control_id"), + ) + + # Convert any existing ClinVar variants into clinical control variants. Since + # this table is being update from a clinvar specific table, we assume all existing + # controls are from ClinVar. + op.execute( + """ + INSERT INTO mapped_variants_clinical_controls ( + mapped_variant_id, + clinical_control_id + ) + SELECT id, clinvar_variant_id + FROM mapped_variants + WHERE clinvar_variant_id IS NOT NULL + """ + ) + + op.execute("UPDATE clinical_controls SET db_name='ClinVar'") + op.alter_column("clinical_controls", "db_name", nullable=False) + + op.drop_index("ix_mapped_variants_clinvar_variant_id", "mapped_variants") + op.drop_column("mapped_variants", "clinvar_variant_id") + + +def downgrade(): + op.rename_table("clinical_controls", "clinvar_variants") + op.execute("ALTER SEQUENCE clinical_controls_id_seq RENAME TO clinvar_variants_id_seq") + op.execute("ALTER INDEX clinical_controls_pkey RENAME TO clinvar_variants_pkey") + + op.drop_index("ix_clinical_controls_gene_symbol", "clinical_controls") + op.drop_index("ix_clinical_controls_db_name", "clinical_controls") + op.drop_index("ix_clinical_controls_db_identifier", "clinical_controls") + op.drop_index("ix_clinical_controls_db_version", "clinical_controls") + + op.alter_column("clinvar_variants", "db_version", nullable=False, new_column_name="clinvar_db_version") + op.alter_column("clinvar_variants", "db_identifier", nullable=False, new_column_name="allele_id") + op.drop_column("clinvar_variants", "db_name") + + op.add_column( + "mapped_variants", + sa.Column("clinvar_variant_id", sa.Integer(), sa.ForeignKey("clinvar_variants.id"), nullable=True), + ) + + # Downgrades a many-to-many relationship to a one to many. This will result in data loss. + op.execute( + """ + UPDATE mapped_variants + SET clinvar_variant_id=mapped_variants_clinical_controls.clinical_control_id + FROM mapped_variants_clinical_controls + WHERE mapped_variants_clinical_controls.mapped_variant_id=mapped_variants.id + """ + ) + + op.create_index("ix_mapped_variants_clinvar_variant_id", "mapped_variants", ["clinvar_variant_id"]) + op.drop_table("mapped_variants_clinical_controls") diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index d4b05eba..4ecbc830 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -23,7 +23,7 @@ from mavedb.lib.mave.utils import is_csv_null from mavedb.lib.validation.constants.general import null_values_list from mavedb.lib.validation.utilities import is_null as validate_is_null -from mavedb.models.clinvar_variant import ClinvarVariant +from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.doi_identifier import DoiIdentifier @@ -451,16 +451,13 @@ def get_score_set_scores_as_csv( type_column = "score_data" # HACK: This is a poorly tested and very temporary solution to surface clinical significance and - # clinical review status within the CSV export in a way our front end can handle and display. - # current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.current.is_(True)).subquery() - current_mapped_variants_subquery = db.query(MappedVariant).filter(MappedVariant.vrs_version == '1.3').subquery() + # clinical review status within the CSV export in a way our front end can handle and display. It's + # also quite slow. variants_query = ( - select(Variant, ClinvarVariant.clinical_significance, ClinvarVariant.clinical_review_status) - .join( - current_mapped_variants_subquery, Variant.id == current_mapped_variants_subquery.c.variant_id, isouter=True - ) - .join(ClinvarVariant, current_mapped_variants_subquery.c.clinvar_variant_id == ClinvarVariant.id, isouter=True) - .where(Variant.score_set_id == score_set.id) + select(Variant, ClinicalControl.clinical_significance, ClinicalControl.clinical_review_status) + .join(MappedVariant, ClinicalControl.mapped_variants, isouter=True) + .where(Variant.score_set_id == score_set.id, MappedVariant.vrs_version == "1.3") + # .where(Variant.score_set_id == score_set.id,MappedVariant.current.is_(True)) .order_by(cast(func.split_part(Variant.urn, "#", 2), Integer)) ) if start: diff --git a/src/mavedb/models/__init__.py b/src/mavedb/models/__init__.py index 111809a4..af823015 100644 --- a/src/mavedb/models/__init__.py +++ b/src/mavedb/models/__init__.py @@ -1,7 +1,7 @@ __all__ = [ "access_key", "collection", - "clinvar_variant", + "clinical_control", "controlled_keyword", "doi_identifier", "ensembl_identifier", diff --git a/src/mavedb/models/clinical_control.py b/src/mavedb/models/clinical_control.py new file mode 100644 index 00000000..a7462007 --- /dev/null +++ b/src/mavedb/models/clinical_control.py @@ -0,0 +1,35 @@ +from datetime import date +from typing import TYPE_CHECKING + +from sqlalchemy import Column, Date, Integer, String +from sqlalchemy.orm import Mapped, relationship + +from mavedb.db.base import Base +from mavedb.models.clinical_control_mapped_variant import mapped_variants_clinical_controls_association_table + +if TYPE_CHECKING: + from mavedb.models.mapped_variant import MappedVariant + + +class ClinicalControl(Base): + __tablename__ = "clinical_controls" + + id = Column(Integer, primary_key=True) + + gene_symbol = Column(String, nullable=False, index=True) + + clinical_significance = Column(String, nullable=False) + clinical_review_status = Column(String, nullable=False) + + db_name = Column(String, nullable=False, index=True) + db_identifier = Column(String, nullable=False, index=True) + db_version = Column(String, nullable=False, index=True) + + creation_date = Column(Date, nullable=False, default=date.today) + modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) + + mapped_variants: Mapped[list["MappedVariant"]] = relationship( + "MappedVariant", + secondary=mapped_variants_clinical_controls_association_table, + back_populates="clinical_controls", + ) diff --git a/src/mavedb/models/clinical_control_mapped_variant.py b/src/mavedb/models/clinical_control_mapped_variant.py new file mode 100644 index 00000000..eabb7689 --- /dev/null +++ b/src/mavedb/models/clinical_control_mapped_variant.py @@ -0,0 +1,11 @@ +from sqlalchemy import Column, Table, ForeignKey + +from mavedb.db.base import Base + + +mapped_variants_clinical_controls_association_table = Table( + "mapped_variants_clinical_controls", + Base.metadata, + Column("mapped_variant_id", ForeignKey("mapped_variants.id"), primary_key=True), + Column("clinical_control_id", ForeignKey("clinical_controls.id"), primary_key=True), +) diff --git a/src/mavedb/models/clinvar_variant.py b/src/mavedb/models/clinvar_variant.py deleted file mode 100644 index f2ba2f00..00000000 --- a/src/mavedb/models/clinvar_variant.py +++ /dev/null @@ -1,28 +0,0 @@ -from datetime import date -from typing import TYPE_CHECKING - -from sqlalchemy import Column, Date, Integer, String -from sqlalchemy.orm import Mapped, relationship - -from mavedb.db.base import Base - -if TYPE_CHECKING: - from .mapped_variant import MappedVariant - - -class ClinvarVariant(Base): - __tablename__ = "clinvar_variants" - - id = Column(Integer, primary_key=True) - allele_id = Column(Integer, nullable=False, index=True) - gene_symbol = Column(String, nullable=False) - - clinical_significance = Column(String, nullable=False) - clinical_review_status = Column(String, nullable=False) - - clinvar_db_version = Column(String, nullable=False) - - creation_date = Column(Date, nullable=False, default=date.today) - modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) - - mapped_variants: Mapped[list["MappedVariant"]] = relationship(back_populates="clinvar_variant") diff --git a/src/mavedb/models/mapped_variant.py b/src/mavedb/models/mapped_variant.py index 0648832d..9e01f6e9 100644 --- a/src/mavedb/models/mapped_variant.py +++ b/src/mavedb/models/mapped_variant.py @@ -6,9 +6,10 @@ from sqlalchemy.orm import Mapped, relationship from mavedb.db.base import Base +from mavedb.models.clinical_control_mapped_variant import mapped_variants_clinical_controls_association_table if TYPE_CHECKING: - from .clinvar_variant import ClinvarVariant + from .clinical_control import ClinicalControl from .variant import Variant @@ -29,5 +30,8 @@ class MappedVariant(Base): variant_id = Column(Integer, ForeignKey("variants.id"), index=True, nullable=False) variant: Mapped["Variant"] = relationship("Variant", back_populates="mapped_variants") - clinvar_variant_id = Column(Integer, ForeignKey("clinvar_variants.id"), nullable=True, index=True) - clinvar_variant: Mapped["ClinvarVariant"] = relationship("ClinvarVariant", back_populates="mapped_variants") + clinical_controls: Mapped[list["ClinicalControl"]] = relationship( + "ClinicalControl", + secondary=mapped_variants_clinical_controls_association_table, + back_populates="mapped_variants", + ) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index c0f30533..e49c0921 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -15,7 +15,7 @@ from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.clinvar_variant import ClinvarVariant +from mavedb.models.clinical_control import ClinicalControl from mavedb.scripts.environment import with_database_session logger = logging.getLogger(__name__) @@ -91,8 +91,10 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s variant_data = tsv_data[clinvar_allele_id] clinvar_variant = db.scalars( - select(ClinvarVariant).where( - ClinvarVariant.allele_id == clinvar_allele_id, ClinvarVariant.clinvar_db_version == version + select(ClinicalControl).where( + ClinicalControl.db_identifier == clinvar_allele_id, + ClinicalControl.db_version == version, + ClinicalControl.db_name == "ClinVar", ) ).one_or_none() if clinvar_variant: @@ -100,12 +102,13 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s clinvar_variant.clinical_significance = variant_data.get("ClinicalSignificance") clinvar_variant.clinical_review_status = variant_data.get("ReviewStatus") else: - clinvar_variant = ClinvarVariant( - allele_id=clinvar_allele_id, + clinvar_variant = ClinicalControl( + db_identifier=clinvar_allele_id, gene_symbol=variant_data.get("GeneSymbol"), clinical_significance=variant_data.get("ClinicalSignificance"), clinical_review_status=variant_data.get("ReviewStatus"), - clinvar_db_version=version, + db_version=version, + db_name="ClinVar", ) db.add(clinvar_variant) @@ -114,7 +117,7 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s select(MappedVariant).join(Variant).where(Variant.clingen_allele_id == clingen_id) ).all() for variant in variants_with_clingen_allele_id: - variant.clinvar_variant_id = clinvar_variant.id + variant.clinvar_variants.append(clinvar_variant) db.add(variant) db.commit() diff --git a/src/mavedb/view_models/clinvar_variant.py b/src/mavedb/view_models/clinical_control.py similarity index 64% rename from src/mavedb/view_models/clinvar_variant.py rename to src/mavedb/view_models/clinical_control.py index 4c81d276..0f8650c4 100644 --- a/src/mavedb/view_models/clinvar_variant.py +++ b/src/mavedb/view_models/clinical_control.py @@ -8,28 +8,29 @@ from mavedb.view_models.base.base import BaseModel -class ClinvarVariantBase(BaseModel): - allele_id: int +class ClinicalControlBase(BaseModel): + db_identifier: int gene_symbol: str clinical_significance: str clinical_review_status: str - clinvar_db_version: str + db_version: str + db_name: str -class ClinvarVariantCreate(ClinvarVariantBase): +class ClinicalControlCreate(ClinicalControlBase): mapped_variants: Optional[list[MappedVariantCreate]] = None -class ClinvarVariantUpdate(ClinvarVariantBase): +class ClinicalControlUpdate(ClinicalControlBase): pass # Properties shared by models stored in DB -class SavedClinvarVariant(ClinvarVariantBase): +class SavedClinicalControl(ClinicalControlBase): id: int modification_date: date creation_date: date - #mapped_variants: Sequence[SavedMappedVariant] + mapped_variants: Sequence[SavedMappedVariant] record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) @@ -39,14 +40,13 @@ class Config: # Properties to return to non-admin clients -class ClinvarVariant(SavedClinvarVariant): - pass - #mapped_variants: Sequence[MappedVariant] +class ClinicalControl(SavedClinicalControl): + mapped_variants: Sequence[MappedVariant] # ruff: noqa: E402 from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate -ClinvarVariantCreate.update_forward_refs() -SavedClinvarVariant.update_forward_refs() -ClinvarVariant.update_forward_refs() +ClinicalControlCreate.update_forward_refs() +SavedClinicalControl.update_forward_refs() +ClinicalControl.update_forward_refs() diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index e3aff59e..b4e1ae45 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -2,7 +2,7 @@ from __future__ import annotations from datetime import date -from typing import Any, Optional +from typing import Any, Optional, Sequence from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel @@ -21,7 +21,7 @@ class MappedVariantBase(BaseModel): class MappedVariantCreate(MappedVariantBase): - clinvar_variant: Optional[ClinvarVariantCreate] + clinical_controles: Sequence[ClinicalControlBase] class MappedVariantUpdate(MappedVariantBase): @@ -31,7 +31,7 @@ class MappedVariantUpdate(MappedVariantBase): # Properties shared by models stored in DB class SavedMappedVariant(MappedVariantBase): id: int - clinvar_variant: Optional[SavedClinvarVariant] + clinvar_variant: Sequence[SavedClinicalControl] record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) @@ -42,11 +42,11 @@ class Config: # Properties to return to non-admin clients class MappedVariant(SavedMappedVariant): - clinvar_variant: Optional[ClinvarVariant] + clinvar_variant: Optional[ClinicalControl] # ruff: noqa: E402 -from mavedb.view_models.clinvar_variant import ClinvarVariant, ClinvarVariantCreate, SavedClinvarVariant +from mavedb.view_models.clinical_control import ClinicalControlBase, SavedClinicalControl, ClinicalControl MappedVariantCreate.update_forward_refs() SavedMappedVariant.update_forward_refs() From 7e10d7f68600d4a197f37e39726c4067e09fca79 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 19 Feb 2025 11:33:30 -0800 Subject: [PATCH 045/166] Move clingen_allele_id column to mapped_variants table --- ...de3c9_move_clingen_allele_id_to_mapped_.py | 53 +++++++++++++++++++ src/mavedb/models/mapped_variant.py | 2 + src/mavedb/routers/variants.py | 30 ++++++----- .../scripts/refresh_clinvar_variant_data.py | 13 +++-- src/mavedb/view_models/mapped_variant.py | 3 +- src/mavedb/view_models/variant.py | 2 +- 6 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 alembic/versions/d6e5a9fde3c9_move_clingen_allele_id_to_mapped_.py diff --git a/alembic/versions/d6e5a9fde3c9_move_clingen_allele_id_to_mapped_.py b/alembic/versions/d6e5a9fde3c9_move_clingen_allele_id_to_mapped_.py new file mode 100644 index 00000000..c6decaf6 --- /dev/null +++ b/alembic/versions/d6e5a9fde3c9_move_clingen_allele_id_to_mapped_.py @@ -0,0 +1,53 @@ +"""move clingen allele id to mapped variants table + +Revision ID: d6e5a9fde3c9 +Revises: 695b73abe581 +Create Date: 2025-02-19 10:51:07.319962 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "d6e5a9fde3c9" +down_revision = "695b73abe581" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("ix_variants_clingen_allele_id", table_name="variants") + op.add_column("mapped_variants", sa.Column("clingen_allele_id", sa.String(), nullable=True)) + op.execute( + """ + UPDATE mapped_variants + SET clingen_allele_id=variants.clingen_allele_id + FROM variants + WHERE variants.id=mapped_variants.variant_id + """ + ) + op.drop_column("variants", "clingen_allele_id") + op.create_index( + op.f("ix_mapped_variants_clingen_allele_id"), "mapped_variants", ["clingen_allele_id"], unique=False + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_mapped_variants_clingen_allele_id"), table_name="mapped_variants") + op.add_column("variants", sa.Column("clingen_allele_id", sa.String(), nullable=True)) + op.execute( + """ + UPDATE variants + SET clingen_allele_id=mapped_variants.clingen_allele_id + FROM mapped_variants + WHERE variants.id=mapped_variants.variant_id + """ + ) + op.drop_column("mapped_variants", "clingen_allele_id") + op.create_index("ix_variants_clingen_allele_id", "variants", ["clingen_allele_id"], unique=False) + # ### end Alembic commands ### diff --git a/src/mavedb/models/mapped_variant.py b/src/mavedb/models/mapped_variant.py index 9e01f6e9..8396dba8 100644 --- a/src/mavedb/models/mapped_variant.py +++ b/src/mavedb/models/mapped_variant.py @@ -30,6 +30,8 @@ class MappedVariant(Base): variant_id = Column(Integer, ForeignKey("variants.id"), index=True, nullable=False) variant: Mapped["Variant"] = relationship("Variant", back_populates="mapped_variants") + clingen_allele_id = Column(String, index=True, nullable=True) + clinical_controls: Mapped[list["ClinicalControl"]] = relationship( "ClinicalControl", secondary=mapped_variants_clinical_controls_association_table, diff --git a/src/mavedb/routers/variants.py b/src/mavedb/routers/variants.py index 7557acff..853ebedf 100644 --- a/src/mavedb/routers/variants.py +++ b/src/mavedb/routers/variants.py @@ -4,6 +4,7 @@ from fastapi.exceptions import HTTPException from mavedb.lib.authentication import UserData, get_current_user from mavedb.lib.permissions import Action, assert_permission, has_permission +from sqlalchemy import select from sqlalchemy.exc import MultipleResultsFound from sqlalchemy.orm import Session, joinedload @@ -11,8 +12,13 @@ from mavedb.lib.logging import LoggedRoute from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.models.score_set import ScoreSet +from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant -from mavedb.view_models.variant import ClingenAlleleIdVariantLookupsRequest, VariantWithScoreSet, VariantWithShortScoreSet +from mavedb.view_models.variant import ( + ClingenAlleleIdVariantLookupsRequest, + VariantWithScoreSet, + VariantWithShortScoreSet, +) router = APIRouter( prefix="/api/v1", tags=["access keys"], responses={404: {"description": "Not found"}}, route_class=LoggedRoute @@ -28,18 +34,18 @@ def lookup_variants( db: Session = Depends(deps.get_db), user_data: UserData = Depends(get_current_user), ): - variants = ( - db.query(Variant) - .options(joinedload(Variant.score_set).joinedload(ScoreSet.experiment), joinedload(Variant.mapped_variants)) - .filter(Variant.clingen_allele_id.in_(request.clingen_allele_ids)) - .all() - ) - variants[:] = [ - variant for variant in variants if has_permission(user_data, variant.score_set, Action.READ).permitted - ] + variants = db.execute( + select(Variant, MappedVariant.clingen_allele_id) + .join(MappedVariant) + .options(joinedload(Variant.score_set).joinedload(ScoreSet.experiment)) + .where(MappedVariant.clingen_allele_id.in_(request.clingen_allele_ids)) + ).all() + variants_by_allele_id: dict[str, list[Variant]] = {allele_id: [] for allele_id in request.clingen_allele_ids} - for variant in variants: - variants_by_allele_id[variant.clingen_allele_id].append(variant) + + for variant, allele_id in variants: + if has_permission(user_data, variant.score_set, Action.READ).permitted: + variants_by_allele_id[allele_id].append(variant) return [variants_by_allele_id[allele_id] for allele_id in request.clingen_allele_ids] diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index e49c0921..b09cb337 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -13,7 +13,6 @@ from sqlalchemy import select, distinct, func from sqlalchemy.orm import Session -from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant from mavedb.models.clinical_control import ClinicalControl from mavedb.scripts.environment import with_database_session @@ -69,9 +68,9 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" logger.info(f"Fetched TSV variant data for ClinVar for {version}.") - total_variants_with_clingen_ids = db.scalar(func.count(distinct(Variant.clingen_allele_id))) + total_variants_with_clingen_ids = db.scalar(func.count(distinct(MappedVariant.clingen_allele_id))) clingen_ids = db.scalars( - select(distinct(Variant.clingen_allele_id)).where(Variant.clingen_allele_id.is_not(None)) + select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) ).all() logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") @@ -114,11 +113,11 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s db.add(clinvar_variant) variants_with_clingen_allele_id = db.scalars( - select(MappedVariant).join(Variant).where(Variant.clingen_allele_id == clingen_id) + select(MappedVariant).where(MappedVariant.clingen_allele_id == clingen_id) ).all() - for variant in variants_with_clingen_allele_id: - variant.clinvar_variants.append(clinvar_variant) - db.add(variant) + for mapped_variant in variants_with_clingen_allele_id: + mapped_variant.clinical_controls.append(clinvar_variant) + db.add(mapped_variant) db.commit() logger.debug( diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index b4e1ae45..bf7c7b3a 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -32,6 +32,7 @@ class MappedVariantUpdate(MappedVariantBase): class SavedMappedVariant(MappedVariantBase): id: int clinvar_variant: Sequence[SavedClinicalControl] + clingen_allele_id: Optional[str] record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) @@ -42,7 +43,7 @@ class Config: # Properties to return to non-admin clients class MappedVariant(SavedMappedVariant): - clinvar_variant: Optional[ClinicalControl] + clinvar_variant: Sequence[ClinicalControl] # ruff: noqa: E402 diff --git a/src/mavedb/view_models/variant.py b/src/mavedb/view_models/variant.py index affc0d48..f7c71349 100644 --- a/src/mavedb/view_models/variant.py +++ b/src/mavedb/view_models/variant.py @@ -38,7 +38,6 @@ class SavedVariant(VariantBase): id: int record_type: str = None # type: ignore - clingen_allele_id: Optional[str] _record_type_factory = record_type_validator()(set_record_type) @@ -70,6 +69,7 @@ class Variant(SavedVariant): class VariantWithScoreSet(SavedVariant): """Variant view model with mapped variants and score set""" + score_set: "ScoreSet" mapped_variants: list[MappedVariant] From 0b1a4329a9aaf64cb8ee77c530b119f46467fab7 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 19 Feb 2025 11:36:46 -0800 Subject: [PATCH 046/166] fixup --- src/mavedb/models/variant.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mavedb/models/variant.py b/src/mavedb/models/variant.py index f81878bb..b038c1ea 100644 --- a/src/mavedb/models/variant.py +++ b/src/mavedb/models/variant.py @@ -18,7 +18,6 @@ class Variant(Base): id = Column(Integer, primary_key=True) urn = Column(String(64), index=True, nullable=True, unique=True) - clingen_allele_id = Column(String, index=True, nullable=True) data = Column(JSONB, nullable=False) score_set_id = Column("scoreset_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=False) From aa74796ec9f7cad9490ef193c56b264a9d534c88 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 19 Feb 2025 12:54:59 -0800 Subject: [PATCH 047/166] Clinical control router tests and code fixup from tests output --- src/mavedb/routers/score_sets.py | 76 +++++++++++++- src/mavedb/view_models/clinical_control.py | 15 ++- src/mavedb/view_models/mapped_variant.py | 17 +++- tests/helpers/constants.py | 111 +++++++++++++++++++++ tests/helpers/util.py | 78 +++++++++++++++ tests/routers/conftest.py | 5 + tests/routers/test_score_set.py | 90 +++++++++++++++++ 7 files changed, 381 insertions(+), 11 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 65e83c0f..63abb69f 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,6 +1,6 @@ import logging from datetime import date -from typing import Any, List, Optional +from typing import Any, List, Optional, Sequence import pandas as pd import pydantic @@ -53,6 +53,7 @@ generate_experiment_urn, generate_score_set_urn, ) +from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.enums.user_role import UserRole @@ -64,7 +65,7 @@ from mavedb.models.target_gene import TargetGene from mavedb.models.target_sequence import TargetSequence from mavedb.models.variant import Variant -from mavedb.view_models import mapped_variant, score_set, calibration +from mavedb.view_models import mapped_variant, score_set, calibration, clinical_control from mavedb.view_models.search import ScoreSetsSearch logger = logging.getLogger(__name__) @@ -1143,3 +1144,74 @@ async def publish_score_set( ) return item + + +@router.get( + "/score-sets/{urn}/clinical-controls", + status_code=200, + response_model=list[clinical_control.ClinicalControlWithMappedVariants], + response_model_exclude_none=True, +) +async def get_clinical_controls_for_score_set( + *, + urn: str, + # We'd prefer to reserve `db` as a query parameter. + _db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user), + db: Optional[str] = None, + version: Optional[str] = None, +) -> Sequence[ClinicalControl]: + """ + Fetch relevant clinical controls for a given score set. + """ + save_to_logging_context({"requested_resource": urn, "resource_property": "clinical_controls"}) + + # Rename user facing kwargs for consistency with code base naming conventions. My-py doesn't care for us redefining db. + db_name = db + db_version = version + + item: Optional[ScoreSet] = _db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() + if not item: + logger.info( + msg="Failed to fetch clinical controls for score set; The requested score set does not exist.", + extra=logging_context(), + ) + raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found") + + assert_permission(user_data, item, Action.READ) + + clinical_controls_query = ( + select(ClinicalControl) + .join(MappedVariant, ClinicalControl.mapped_variants) + .join(Variant) + .where(Variant.score_set_id == item.id) + ) + + if db_name is not None: + save_to_logging_context({"db_name": db_name}) + clinical_controls_query = clinical_controls_query.where(ClinicalControl.db_name == db_name) + + if db_version is not None: + save_to_logging_context({"db_version": db_version}) + clinical_controls_query = clinical_controls_query.where(ClinicalControl.db_version == db_version) + + clinical_controls_for_item: Sequence[ClinicalControl] = _db.scalars(clinical_controls_query).all() + + if not clinical_controls_for_item: + logger.info( + msg="No clinical control variants matching the provided filters are associated with the requested score set.", + extra=logging_context(), + ) + raise HTTPException( + status_code=404, + detail=f"No clinical control variants matching the provided filters associated with score set URN {urn} were found", + ) + + for control_variant in clinical_controls_for_item: + control_variant.mapped_variants = [ + mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id + ] + + save_to_logging_context({"resource_count": len(clinical_controls_for_item)}) + + return clinical_controls_for_item diff --git a/src/mavedb/view_models/clinical_control.py b/src/mavedb/view_models/clinical_control.py index 0f8650c4..6f7b5600 100644 --- a/src/mavedb/view_models/clinical_control.py +++ b/src/mavedb/view_models/clinical_control.py @@ -9,7 +9,7 @@ class ClinicalControlBase(BaseModel): - db_identifier: int + db_identifier: str gene_symbol: str clinical_significance: str clinical_review_status: str @@ -30,7 +30,6 @@ class SavedClinicalControl(ClinicalControlBase): id: int modification_date: date creation_date: date - mapped_variants: Sequence[SavedMappedVariant] record_type: str = None # type: ignore _record_type_factory = record_type_validator()(set_record_type) @@ -39,8 +38,16 @@ class Config: orm_mode = True +class SavedClinicalControlWithMappedVariants(SavedClinicalControl): + mapped_variants: Sequence[SavedMappedVariant] + + # Properties to return to non-admin clients class ClinicalControl(SavedClinicalControl): + pass + + +class ClinicalControlWithMappedVariants(SavedClinicalControlWithMappedVariants): mapped_variants: Sequence[MappedVariant] @@ -48,5 +55,5 @@ class ClinicalControl(SavedClinicalControl): from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate ClinicalControlCreate.update_forward_refs() -SavedClinicalControl.update_forward_refs() -ClinicalControl.update_forward_refs() +SavedClinicalControlWithMappedVariants.update_forward_refs() +ClinicalControlWithMappedVariants.update_forward_refs() diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index bf7c7b3a..37abb3a3 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -31,7 +31,6 @@ class MappedVariantUpdate(MappedVariantBase): # Properties shared by models stored in DB class SavedMappedVariant(MappedVariantBase): id: int - clinvar_variant: Sequence[SavedClinicalControl] clingen_allele_id: Optional[str] record_type: str = None # type: ignore @@ -41,14 +40,22 @@ class Config: orm_mode = True +class SavedMappedVariantWithControls(SavedMappedVariant): + clinical_controls: Sequence[SavedClinicalControl] + + # Properties to return to non-admin clients class MappedVariant(SavedMappedVariant): - clinvar_variant: Sequence[ClinicalControl] + pass + + +class MappedVariantWithControls(SavedMappedVariantWithControls): + clinical_controls: Sequence[ClinicalControl] # ruff: noqa: E402 -from mavedb.view_models.clinical_control import ClinicalControlBase, SavedClinicalControl, ClinicalControl +from mavedb.view_models.clinical_control import ClinicalControlBase, ClinicalControl, SavedClinicalControl MappedVariantCreate.update_forward_refs() -SavedMappedVariant.update_forward_refs() -MappedVariant.update_forward_refs() +SavedMappedVariantWithControls.update_forward_refs() +MappedVariantWithControls.update_forward_refs() diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 03abc856..681d781a 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -11,6 +11,15 @@ TEST_CROSSREF_IDENTIFIER = "10.1371/2021.06.22.21259265" TEST_ORCID_ID = "1111-1111-1111-1111" +TEST_GA4GH_IDENTIFIER = "ga4gh:SQ.test" +# ^[0-9A-Za-z_\-]{32}$ +TEST_GA4GH_DIGEST = "ga4ghtest_ga4ghtest_ga4ghtest_dg" +# ^SQ.[0-9A-Za-z_\-]{32}$ +TEST_REFGET_ACCESSION = "SQ.ga4ghtest_ga4ghtest_ga4ghtest_rg" +TEST_SEQUENCE_LOCATION_ACCESSION = "ga4gh:SL.test" + +TEST_REFSEQ_IDENTIFIER = "NM_003345" + VALID_ACCESSION = "NM_001637.3" VALID_GENE = "BRCA1" @@ -36,6 +45,56 @@ "id": 1, } +TEST_VALID_PRE_MAPPED_VRS_ALLELE = { + "id": TEST_GA4GH_IDENTIFIER, + "type": "Allele", + "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, + "digest": TEST_GA4GH_DIGEST, + "location": { + "id": TEST_SEQUENCE_LOCATION_ACCESSION, + "end": 2, + "type": "SequenceLocation", + "start": 1, + "digest": TEST_GA4GH_DIGEST, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": TEST_REFGET_ACCESSION, + }, + }, + "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "W"}], +} + +TEST_VALID_POST_MAPPED_VRS_ALLELE = { + "id": TEST_GA4GH_IDENTIFIER, + "type": "Allele", + "state": {"type": "LiteralSequenceExpression", "sequence": "F"}, + "digest": TEST_GA4GH_DIGEST, + "location": { + "id": TEST_SEQUENCE_LOCATION_ACCESSION, + "end": 6, + "type": "SequenceLocation", + "start": 5, + "digest": TEST_GA4GH_DIGEST, + "sequenceReference": { + "type": "SequenceReference", + "label": TEST_REFSEQ_IDENTIFIER, + "refgetAccession": TEST_REFGET_ACCESSION, + }, + }, + "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "D"}], + "expressions": [{"value": f"{TEST_REFSEQ_IDENTIFIER}:p.Asp5Phe", "syntax": "hgvs.p"}], +} + +TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE = { + "type": "Haplotype", + "members": [TEST_VALID_PRE_MAPPED_VRS_ALLELE, TEST_VALID_PRE_MAPPED_VRS_ALLELE], +} + +TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE = { + "type": "Haplotype", + "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE, TEST_VALID_POST_MAPPED_VRS_ALLELE], +} + TEST_USER = { "username": "0000-1111-2222-3333", "first_name": "First", @@ -764,3 +823,55 @@ "editors": [], "viewers": [], } + +TEST_CLINVAR_CONTROL = { + "db_identifier": "183058", + "gene_symbol": "PTEN", + "clinical_significance": "Likely benign", + "clinical_review_status": "criteria provided, multiple submitters, no conflicts", + "db_name": "ClinVar", + "db_version": "11_2024", +} + + +TEST_SAVED_CLINVAR_CONTROL = { + "recordType": "ClinicalControlWithMappedVariants", + "dbIdentifier": "183058", + "geneSymbol": "PTEN", + "clinicalSignificance": "Likely benign", + "clinicalReviewStatus": "criteria provided, multiple submitters, no conflicts", + "dbName": "ClinVar", + "dbVersion": "11_2024", + "mappedVariants": [], +} + + +TEST_GENERIC_CLINICAL_CONTROL = { + "db_identifier": "ABC123", + "gene_symbol": "BRCA1", + "clinical_significance": "benign", + "clinical_review_status": "lots of convincing evidence", + "db_name": "GenDB", + "db_version": "2024", +} + + +TEST_SAVED_GENERIC_CLINICAL_CONTROL = { + "recordType": "ClinicalControlWithMappedVariants", + "dbIdentifier": "ABC123", + "geneSymbol": "BRCA1", + "clinicalSignificance": "benign", + "clinicalReviewStatus": "lots of convincing evidence", + "dbName": "GenDB", + "dbVersion": "2024", + "mappedVariants": [], +} + +TEST_MINIMAL_MAPPED_VARIANT = { + "variant_id": 1, + "modification_date": date.today(), + "vrs_version": "2.0", + "mapped_date": date.today(), + "mapping_api_version": "pytest.0.0", + "current": True, +} diff --git a/tests/helpers/util.py b/tests/helpers/util.py index 6a005e3a..7400c262 100644 --- a/tests/helpers/util.py +++ b/tests/helpers/util.py @@ -1,4 +1,5 @@ from copy import deepcopy +from datetime import date from unittest.mock import patch import cdot.hgvs.dataproviders @@ -9,6 +10,7 @@ from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data, csv_data_to_df from mavedb.lib.validation.dataframe import validate_and_standardize_dataframe_pair +from mavedb.models.clinical_control import ClinicalControl as ClinicalControlDbModel from mavedb.models.contributor import Contributor from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.enums.mapping_state import MappingState @@ -19,9 +21,14 @@ from mavedb.models.user import User from mavedb.models.variant import Variant from mavedb.view_models.collection import Collection +from mavedb.models.mapped_variant import MappedVariant as MappedVariantDbModel +from mavedb.models.variant import Variant as VariantDbModel from mavedb.view_models.experiment import Experiment, ExperimentCreate from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate from tests.helpers.constants import ( + TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE, + TEST_VALID_PRE_MAPPED_VRS_ALLELE, + TEST_VALID_POST_MAPPED_VRS_ALLELE, EXTRA_USER, TEST_CDOT_TRANSCRIPT, TEST_COLLECTION, @@ -31,6 +38,7 @@ TEST_MINIMAL_POST_MAPPED_METADATA, TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_MAPPED_VARIANT, + TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE, ) @@ -211,6 +219,31 @@ def create_mapped_variants_for_score_set(db, score_set_urn): return +def mock_worker_vrs_mapping(client, db, score_set, alleles=True): + # The mapping job is tested elsewhere, so insert mapped variants manually. + variants = db.scalars( + select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"]) + ).all() + + # It's un-important what the contents of each mapped VRS object are, so use the same constant for each variant. + for variant in variants: + mapped_variant = MappedVariantDbModel( + pre_mapped=TEST_VALID_PRE_MAPPED_VRS_ALLELE if alleles else TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE, + post_mapped=TEST_VALID_POST_MAPPED_VRS_ALLELE if alleles else TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE, + variant=variant, + vrs_version="2.0", + modification_date=date.today(), + mapped_date=date.today(), + mapping_api_version="pytest.0.0", + current=True, + ) + db.add(mapped_variant) + + db.commit() + + return client.get(f"/api/v1/score-sets/{score_set['urn']}").json() + + def create_seq_score_set_with_variants( client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None ): @@ -288,3 +321,48 @@ def update_expected_response_for_created_resources(expected_response, created_ex ) return expected_response + + +def create_seq_score_set_with_mapped_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_seq_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path + ) + score_set = mock_worker_vrs_mapping(client, db, score_set) + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def create_acc_score_set_with_mapped_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_acc_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path + ) + score_set = mock_worker_vrs_mapping(client, db, score_set) + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def link_clinical_controls_to_mapped_variants(db, score_set): + mapped_variants = db.scalars( + select(MappedVariantDbModel) + .join(VariantDbModel) + .join(ScoreSetDbModel) + .where(ScoreSetDbModel.urn == score_set["urn"]) + ).all() + + # The first mapped variant gets the clinvar control, the second gets the generic control. + mapped_variants[0].clinical_controls.append( + db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 1)) + ) + mapped_variants[1].clinical_controls.append( + db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 2)) + ) + + db.add(mapped_variants[0]) + db.add(mapped_variants[1]) + db.commit() diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index 591c4e3e..e634f614 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -5,6 +5,7 @@ import cdot.hgvs.dataproviders import pytest +from mavedb.models.clinical_control import ClinicalControl from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor from mavedb.models.enums.user_role import UserRole @@ -15,6 +16,8 @@ from mavedb.models.user import User from tests.helpers.constants import ( ADMIN_USER, + TEST_CLINVAR_CONTROL, + TEST_GENERIC_CLINICAL_CONTROL, EXTRA_USER, EXTRA_CONTRIBUTOR, TEST_CDOT_TRANSCRIPT, @@ -50,6 +53,8 @@ def setup_router_db(session): db.add(License(**TEST_INACTIVE_LICENSE)) db.add(License(**EXTRA_LICENSE)) db.add(Contributor(**EXTRA_CONTRIBUTOR)) + db.add(ClinicalControl(**TEST_CLINVAR_CONTROL)) + db.add(ClinicalControl(**TEST_GENERIC_CLINICAL_CONTROL)) db.bulk_save_objects([ControlledKeyword(**keyword_obj) for keyword_obj in TEST_DB_KEYWORDS]) db.commit() diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 67c26b27..9d7c1f2b 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -36,6 +36,8 @@ SAVED_SHORT_EXTRA_LICENSE, TEST_SCORE_CALIBRATION, TEST_SAVED_SCORE_CALIBRATION, + TEST_SAVED_CLINVAR_CONTROL, + TEST_SAVED_GENERIC_CLINICAL_CONTROL, ) from tests.helpers.dependency_overrider import DependencyOverrider from tests.helpers.util import ( @@ -46,6 +48,8 @@ create_seq_score_set, create_seq_score_set_with_variants, update_expected_response_for_created_resources, + create_seq_score_set_with_mapped_variants, + link_clinical_controls_to_mapped_variants, ) @@ -2415,3 +2419,89 @@ def test_download_counts_file(session, data_provider, client, setup_router_db, d assert "hgvs_nt" in columns assert "hgvs_pro" in columns assert "hgvs_splice" not in columns + +######################################################################################################################## +# Fetching clinical controls for a score set +######################################################################################################################## + + +def test_can_fetch_current_clinical_controls_for_score_set(client, setup_router_db, session, data_provider, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinical_controls_to_mapped_variants(session, score_set) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls") + assert response.status_code == 200 + + response_data = response.json() + assert len(response_data) == 2 + for control in response_data: + mapped_variants = control.pop("mappedVariants") + assert len(mapped_variants) == 1 + assert all( + control[k] in (TEST_SAVED_CLINVAR_CONTROL[k], TEST_SAVED_GENERIC_CLINICAL_CONTROL[k]) + for k in TEST_SAVED_CLINVAR_CONTROL.keys() + if k != "mappedVariants" + ) + + +@pytest.mark.parametrize("clinical_control", [TEST_SAVED_CLINVAR_CONTROL, TEST_SAVED_GENERIC_CLINICAL_CONTROL]) +@pytest.mark.parametrize( + "parameters", [[("db", "dbName")], [("version", "dbVersion")], [("db", "dbName"), ("version", "dbVersion")]] +) +def test_can_fetch_current_clinical_controls_for_score_set_with_parameters( + client, setup_router_db, session, data_provider, data_files, clinical_control, parameters +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinical_controls_to_mapped_variants(session, score_set) + + query_string = "?" + for param, accessor in parameters: + query_string += f"&{param}={clinical_control[accessor]}" + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls{query_string}") + assert response.status_code == 200 + + response_data = response.json() + assert len(response_data) + for param, accessor in parameters: + assert all(control[accessor] == clinical_control[accessor] for control in response_data) + + +def test_cannot_fetch_clinical_controls_for_nonexistent_score_set( + client, setup_router_db, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinical_controls_to_mapped_variants(session, score_set) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']+'xxx'}/clinical-controls") + + assert response.status_code == 404 + response_data = response.json() + assert f"score set with URN '{score_set['urn']+'xxx'}' not found" in response_data["detail"] + + +def test_cannot_fetch_clinical_controls_for_score_set_when_none_exist( + client, setup_router_db, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls") + + assert response.status_code == 404 + response_data = response.json() + assert ( + f"No clinical control variants matching the provided filters associated with score set URN {score_set['urn']} were found" + in response_data["detail"] + ) From a906eb82880dc52bedadfdf8ef2bad2b734d9e44 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 20 Feb 2025 13:08:02 -0800 Subject: [PATCH 048/166] Undo demo hack, refactors variant data to csv function into a single method --- src/mavedb/lib/score_sets.py | 135 ++++++++--------------- src/mavedb/routers/score_sets.py | 17 ++- src/mavedb/scripts/export_public_data.py | 6 +- 3 files changed, 54 insertions(+), 104 deletions(-) diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py index 4ecbc830..be1722c3 100644 --- a/src/mavedb/lib/score_sets.py +++ b/src/mavedb/lib/score_sets.py @@ -3,12 +3,12 @@ import logging import re from operator import attrgetter -from typing import Any, BinaryIO, Iterable, Optional, TYPE_CHECKING, Sequence +from typing import Any, BinaryIO, Iterable, Optional, TYPE_CHECKING, Sequence, Literal import numpy as np import pandas as pd from pandas.testing import assert_index_equal -from sqlalchemy import Integer, cast, func, null, or_, select +from sqlalchemy import Integer, cast, func, or_, select from sqlalchemy.orm import Session, aliased, contains_eager, joinedload, selectinload from mavedb.lib.exceptions import ValidationError @@ -23,7 +23,6 @@ from mavedb.lib.mave.utils import is_csv_null from mavedb.lib.validation.constants.general import null_values_list from mavedb.lib.validation.utilities import is_null as validate_is_null -from mavedb.models.clinical_control import ClinicalControl from mavedb.models.contributor import Contributor from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.doi_identifier import DoiIdentifier @@ -40,7 +39,6 @@ from mavedb.models.score_set_publication_identifier import ( ScoreSetPublicationIdentifierAssociation, ) -from mavedb.models.mapped_variant import MappedVariant from mavedb.models.target_accession import TargetAccession from mavedb.models.target_gene import TargetGene from mavedb.models.target_sequence import TargetSequence @@ -268,30 +266,22 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search: def fetch_superseding_score_set_in_search_result( - score_sets: list[ScoreSet], - requesting_user: Optional["UserData"], - search: ScoreSetsSearch) -> list[ScoreSet]: + score_sets: list[ScoreSet], requesting_user: Optional["UserData"], search: ScoreSetsSearch +) -> list[ScoreSet]: """ Remove superseded score set from search results. Check whether all of the score set are correct versions. """ from mavedb.lib.permissions import Action + if search.published: filtered_score_sets_tail = [ - find_publish_or_private_superseded_score_set_tail( - score_set, - Action.READ, - requesting_user, - search.published - ) for score_set in score_sets + find_publish_or_private_superseded_score_set_tail(score_set, Action.READ, requesting_user, search.published) + for score_set in score_sets ] else: filtered_score_sets_tail = [ - find_superseded_score_set_tail( - score_set, - Action.READ, - requesting_user - ) for score_set in score_sets + find_superseded_score_set_tail(score_set, Action.READ, requesting_user) for score_set in score_sets ] # Remove None item. filtered_score_sets = [score_set for score_set in filtered_score_sets_tail if score_set is not None] @@ -347,10 +337,10 @@ def find_meta_analyses_for_experiment_sets(db: Session, urns: list[str]) -> list def find_superseded_score_set_tail( - score_set: ScoreSet, - action: Optional["Action"] = None, - user_data: Optional["UserData"] = None) -> Optional[ScoreSet]: + score_set: ScoreSet, action: Optional["Action"] = None, user_data: Optional["UserData"] = None +) -> Optional[ScoreSet]: from mavedb.lib.permissions import has_permission + while score_set.superseding_score_set is not None: next_score_set_in_chain = score_set.superseding_score_set @@ -376,47 +366,52 @@ def find_superseded_score_set_tail( def find_publish_or_private_superseded_score_set_tail( - score_set: ScoreSet, - action: Optional["Action"] = None, - user_data: Optional["UserData"] = None, - publish: bool = True) -> Optional[ScoreSet]: + score_set: ScoreSet, action: Optional["Action"] = None, user_data: Optional["UserData"] = None, publish: bool = True +) -> Optional[ScoreSet]: from mavedb.lib.permissions import has_permission + if publish: while score_set.superseding_score_set is not None: next_score_set_in_chain = score_set.superseding_score_set # Find the final published one. - if action is not None and has_permission(user_data, score_set, action).permitted \ - and next_score_set_in_chain.published_date is None: + if ( + action is not None + and has_permission(user_data, score_set, action).permitted + and next_score_set_in_chain.published_date is None + ): return score_set score_set = next_score_set_in_chain else: # Unpublished score set should not be superseded. # It should not have superseding score set, but possible have superseded score set. - if action is not None and score_set.published_date is None \ - and has_permission(user_data, score_set, action).permitted: + if ( + action is not None + and score_set.published_date is None + and has_permission(user_data, score_set, action).permitted + ): return score_set else: return None return score_set -def get_score_set_counts_as_csv( +def get_score_set_variants_as_csv( db: Session, score_set: ScoreSet, + data_type: Literal["scores", "counts"], start: Optional[int] = None, limit: Optional[int] = None, drop_na_columns: Optional[bool] = None, ) -> str: assert type(score_set.dataset_columns) is dict - count_columns = [str(x) for x in list(score_set.dataset_columns.get("count_columns", []))] - # HACK - columns = ( - ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns + ["mavedb_clnsig", "mavedb_clnrevstat"] - ) - type_column = "count_data" + dataset_cols = "score_columns" if data_type == "scores" else "count_columns" + type_column = "score_data" if data_type == "scores" else "count_data" + + count_columns = [str(x) for x in list(score_set.dataset_columns.get(dataset_cols, []))] + columns = ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + count_columns variants_query = ( - select(Variant, null(), null()) + select(Variant) .where(Variant.score_set_id == score_set.id) .order_by(cast(func.split_part(Variant.urn, "#", 2), Integer)) ) @@ -424,49 +419,12 @@ def get_score_set_counts_as_csv( variants_query = variants_query.offset(start) if limit: variants_query = variants_query.limit(limit) - variants = db.execute(variants_query).all() + variants = db.scalars(variants_query).all() - # HACK: Hideous hack for expediency... rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) # type: ignore - stream = io.StringIO() - writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL) - writer.writeheader() - writer.writerows(rows_data) - return stream.getvalue() + if drop_na_columns: + rows_data, columns = drop_na_columns_from_csv_file_rows(rows_data, columns) - -def get_score_set_scores_as_csv( - db: Session, - score_set: ScoreSet, - start: Optional[int] = None, - limit: Optional[int] = None, - drop_na_columns: Optional[bool] = None, -) -> str: - assert type(score_set.dataset_columns) is dict - score_columns = [str(x) for x in list(score_set.dataset_columns.get("score_columns", []))] - # HACK - columns = ( - ["accession", "hgvs_nt", "hgvs_splice", "hgvs_pro"] + score_columns + ["mavedb_clnsig", "mavedb_clnrevstat"] - ) - type_column = "score_data" - - # HACK: This is a poorly tested and very temporary solution to surface clinical significance and - # clinical review status within the CSV export in a way our front end can handle and display. It's - # also quite slow. - variants_query = ( - select(Variant, ClinicalControl.clinical_significance, ClinicalControl.clinical_review_status) - .join(MappedVariant, ClinicalControl.mapped_variants, isouter=True) - .where(Variant.score_set_id == score_set.id, MappedVariant.vrs_version == "1.3") - # .where(Variant.score_set_id == score_set.id,MappedVariant.current.is_(True)) - .order_by(cast(func.split_part(Variant.urn, "#", 2), Integer)) - ) - if start: - variants_query = variants_query.offset(start) - if limit: - variants_query = variants_query.limit(limit) - variants = db.execute(variants_query).all() - - rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column) # type: ignore stream = io.StringIO() writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL) writer.writeheader() @@ -475,8 +433,7 @@ def get_score_set_scores_as_csv( def drop_na_columns_from_csv_file_rows( - rows_data: Iterable[dict[str, Any]], - columns: list[str] + rows_data: Iterable[dict[str, Any]], columns: list[str] ) -> tuple[list[dict[str, Any]], list[str]]: """Process rows_data for downloadable CSV by removing empty columns.""" # Convert map to list. @@ -505,9 +462,7 @@ def is_null(value): return null_values_re.fullmatch(value) or not value -def variant_to_csv_row( - variant: tuple[Variant, str, str], columns: list[str], dtype: str, na_rep="NA" -) -> dict[str, Any]: +def variant_to_csv_row(variant: Variant, columns: list[str], dtype: str, na_rep="NA") -> dict[str, Any]: """ Format a variant into a containing the keys specified in `columns`. @@ -529,29 +484,25 @@ def variant_to_csv_row( row = {} for column_key in columns: if column_key == "hgvs_nt": - value = str(variant[0].hgvs_nt) + value = str(variant.hgvs_nt) elif column_key == "hgvs_pro": - value = str(variant[0].hgvs_pro) + value = str(variant.hgvs_pro) elif column_key == "hgvs_splice": - value = str(variant[0].hgvs_splice) + value = str(variant.hgvs_splice) elif column_key == "accession": - value = str(variant[0].urn) + value = str(variant.urn) else: - parent = variant[0].data.get(dtype) if variant[0].data else None + parent = variant.data.get(dtype) if variant.data else None value = str(parent.get(column_key)) if parent else na_rep if is_null(value): value = na_rep row[column_key] = value - # HACK: Overwrite any potential values of ClinVar fields present in the data - # object with db results from the tuple directly. - row["mavedb_clnsig"] = variant[1] - row["mavedb_clnrevstat"] = variant[2] return row def variants_to_csv_rows( - variants: Sequence[tuple[Variant, str, str]], columns: list[str], dtype: str, na_rep="NA" + variants: Sequence[Variant], columns: list[str], dtype: str, na_rep="NA" ) -> Iterable[dict[str, Any]]: """ Format each variant into a dictionary row containing the keys specified in `columns`. diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 63abb69f..e62be91c 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -38,8 +38,7 @@ from mavedb.lib.score_sets import ( csv_data_to_df, find_meta_analyses_for_experiment_sets, - get_score_set_counts_as_csv, - get_score_set_scores_as_csv, + get_score_set_variants_as_csv, variants_to_csv_rows, ) from mavedb.lib.score_sets import ( @@ -258,7 +257,7 @@ def get_score_set_scores_csv( assert_permission(user_data, score_set, Action.READ) - csv_str = get_score_set_scores_as_csv(db, score_set, start, limit, drop_na_columns) + csv_str = get_score_set_variants_as_csv(db, score_set, "scores", start, limit, drop_na_columns) return StreamingResponse(iter([csv_str]), media_type="text/csv") @@ -313,7 +312,7 @@ async def get_score_set_counts_csv( assert_permission(user_data, score_set, Action.READ) - csv_str = get_score_set_counts_as_csv(db, score_set, start, limit, drop_na_columns) + csv_str = get_score_set_variants_as_csv(db, score_set, "counts", start, limit, drop_na_columns) return StreamingResponse(iter([csv_str]), media_type="text/csv") @@ -1197,6 +1196,11 @@ async def get_clinical_controls_for_score_set( clinical_controls_for_item: Sequence[ClinicalControl] = _db.scalars(clinical_controls_query).all() + for control_variant in clinical_controls_for_item: + control_variant.mapped_variants = [ + mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id + ] + if not clinical_controls_for_item: logger.info( msg="No clinical control variants matching the provided filters are associated with the requested score set.", @@ -1207,11 +1211,6 @@ async def get_clinical_controls_for_score_set( detail=f"No clinical control variants matching the provided filters associated with score set URN {urn} were found", ) - for control_variant in clinical_controls_for_item: - control_variant.mapped_variants = [ - mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id - ] - save_to_logging_context({"resource_count": len(clinical_controls_for_item)}) return clinical_controls_for_item diff --git a/src/mavedb/scripts/export_public_data.py b/src/mavedb/scripts/export_public_data.py index 4a52ee80..8e3857b5 100644 --- a/src/mavedb/scripts/export_public_data.py +++ b/src/mavedb/scripts/export_public_data.py @@ -36,7 +36,7 @@ from sqlalchemy import select from sqlalchemy.orm import lazyload, Session -from mavedb.lib.score_sets import get_score_set_counts_as_csv, get_score_set_scores_as_csv +from mavedb.lib.score_sets import get_score_set_variants_as_csv from mavedb.models.experiment import Experiment from mavedb.models.experiment_set import ExperimentSet from mavedb.models.license import License @@ -147,12 +147,12 @@ def export_public_data(db: Session): logger.info(f"{i + 1}/{num_score_sets} Exporting variants for score set {score_set.urn}") csv_filename_base = score_set.urn.replace(":", "-") - csv_str = get_score_set_scores_as_csv(db, score_set) + csv_str = get_score_set_variants_as_csv(db, score_set, "scores") zipfile.writestr(f"csv/{csv_filename_base}.scores.csv", csv_str) count_columns = score_set.dataset_columns["count_columns"] if score_set.dataset_columns else None if count_columns and len(count_columns) > 0: - csv_str = get_score_set_counts_as_csv(db, score_set) + csv_str = get_score_set_variants_as_csv(db, score_set, "counts") zipfile.writestr(f"csv/{csv_filename_base}.counts.csv", csv_str) From df91ffb304f3f3fcdf53b9e588d440e4d924b9f0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 20 Feb 2025 13:24:00 -0800 Subject: [PATCH 049/166] Replace id with URN for mapped variant view models, inherit create view models from update --- src/mavedb/view_models/clinical_control.py | 4 ++-- src/mavedb/view_models/mapped_variant.py | 13 +++++++++---- tests/helpers/constants.py | 9 --------- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/mavedb/view_models/clinical_control.py b/src/mavedb/view_models/clinical_control.py index 6f7b5600..f7bc0564 100644 --- a/src/mavedb/view_models/clinical_control.py +++ b/src/mavedb/view_models/clinical_control.py @@ -17,11 +17,11 @@ class ClinicalControlBase(BaseModel): db_name: str -class ClinicalControlCreate(ClinicalControlBase): +class ClinicalControlUpdate(ClinicalControlBase): mapped_variants: Optional[list[MappedVariantCreate]] = None -class ClinicalControlUpdate(ClinicalControlBase): +class ClinicalControlCreate(ClinicalControlUpdate): pass diff --git a/src/mavedb/view_models/mapped_variant.py b/src/mavedb/view_models/mapped_variant.py index 37abb3a3..6b3a3d97 100644 --- a/src/mavedb/view_models/mapped_variant.py +++ b/src/mavedb/view_models/mapped_variant.py @@ -11,7 +11,7 @@ class MappedVariantBase(BaseModel): pre_mapped: Optional[Any] post_mapped: Optional[Any] - variant_id: int + variant_urn: str vrs_version: Optional[str] error_message: Optional[str] modification_date: date @@ -19,12 +19,17 @@ class MappedVariantBase(BaseModel): mapping_api_version: str current: bool - -class MappedVariantCreate(MappedVariantBase): - clinical_controles: Sequence[ClinicalControlBase] + @classmethod + def from_orm(cls, obj: Any): + obj.variant_urn = obj.variant.urn + return super().from_orm(obj) class MappedVariantUpdate(MappedVariantBase): + clinical_controls: Sequence[ClinicalControlBase] + + +class MappedVariantCreate(MappedVariantUpdate): pass diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 681d781a..ecaeafca 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -866,12 +866,3 @@ "dbVersion": "2024", "mappedVariants": [], } - -TEST_MINIMAL_MAPPED_VARIANT = { - "variant_id": 1, - "modification_date": date.today(), - "vrs_version": "2.0", - "mapped_date": date.today(), - "mapping_api_version": "pytest.0.0", - "current": True, -} From 453c7a73130d7bc55b1c70f44544e20c9d83c30c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 20 Feb 2025 14:27:46 -0800 Subject: [PATCH 050/166] Add route for generating pairwise db_name/db_version clinical control options --- src/mavedb/routers/score_sets.py | 65 +++++++++++++++++++- src/mavedb/view_models/clinical_control.py | 5 ++ tests/routers/test_score_set.py | 71 +++++++++++++++++++++- 3 files changed, 136 insertions(+), 5 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index e62be91c..b2f88a25 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,6 +1,6 @@ import logging from datetime import date -from typing import Any, List, Optional, Sequence +from typing import Any, List, Literal, Optional, Sequence import pandas as pd import pydantic @@ -1195,13 +1195,16 @@ async def get_clinical_controls_for_score_set( clinical_controls_query = clinical_controls_query.where(ClinicalControl.db_version == db_version) clinical_controls_for_item: Sequence[ClinicalControl] = _db.scalars(clinical_controls_query).all() - + clinical_controls_with_mapped_variant = [] for control_variant in clinical_controls_for_item: control_variant.mapped_variants = [ mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id ] - if not clinical_controls_for_item: + if control_variant.mapped_variants: + clinical_controls_with_mapped_variant.append(control_variant) + + if not clinical_controls_with_mapped_variant: logger.info( msg="No clinical control variants matching the provided filters are associated with the requested score set.", extra=logging_context(), @@ -1214,3 +1217,59 @@ async def get_clinical_controls_for_score_set( save_to_logging_context({"resource_count": len(clinical_controls_for_item)}) return clinical_controls_for_item + + +@router.get( + "/score-sets/{urn}/clinical-controls/options", + status_code=200, + response_model=clinical_control.ClinicalControlOptions, + response_model_exclude_none=True, +) +async def get_clinical_controls_options_for_score_set( + *, + urn: str, + # We'd prefer to reserve `db` as a query parameter. + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user), +) -> dict[Literal["control_options"], dict[str, list[str]]]: + """ + Fetch clinical control options for a given score set. + """ + save_to_logging_context({"requested_resource": urn, "resource_property": "clinical_control_options"}) + + item: Optional[ScoreSet] = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() + if not item: + logger.info( + msg="Failed to fetch clinical control options for score set; The requested score set does not exist.", + extra=logging_context(), + ) + raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found") + + assert_permission(user_data, item, Action.READ) + + clinical_controls_query = ( + select(ClinicalControl.db_name, ClinicalControl.db_version) + .join(MappedVariant, ClinicalControl.mapped_variants) + .join(Variant) + .where(Variant.score_set_id == item.id) + ) + + clinical_controls_for_item = db.execute(clinical_controls_query).unique() + + # NOTE: We return options even for pairwise groupings which may have no associated mapped variants + # and 404 when ultimately requested together. + clinical_control_options: dict[str, list[str]] = {} + for db_name, db_version in clinical_controls_for_item: + clinical_control_options.setdefault(db_name, []).append(db_version) + + if not clinical_control_options: + logger.info( + msg="Failed to fetch clinical control options for score set; No clinical control variants are associated with this score set.", + extra=logging_context(), + ) + raise HTTPException( + status_code=404, + detail=f"no clinical control variants associated with score set URN {urn} were found", + ) + + return {"control_options": clinical_control_options} diff --git a/src/mavedb/view_models/clinical_control.py b/src/mavedb/view_models/clinical_control.py index f7bc0564..8c8aa09c 100644 --- a/src/mavedb/view_models/clinical_control.py +++ b/src/mavedb/view_models/clinical_control.py @@ -51,6 +51,11 @@ class ClinicalControlWithMappedVariants(SavedClinicalControlWithMappedVariants): mapped_variants: Sequence[MappedVariant] +class ClinicalControlOptions(BaseModel): + # {"db_name": ["db_version", "db_version", ...]} + control_options: dict[str, list[str]] + + # ruff: noqa: E402 from mavedb.view_models.mapped_variant import MappedVariant, SavedMappedVariant, MappedVariantCreate diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 9d7c1f2b..03851672 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -7,10 +7,11 @@ import pytest from arq import ArqRedis from humps import camelize -from sqlalchemy import select +from sqlalchemy import select, delete from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE, MAVEDB_SCORE_SET_URN_RE, MAVEDB_EXPERIMENT_URN_RE from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.clinical_control import ClinicalControl from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.models.variant import Variant as VariantDbModel @@ -2420,8 +2421,9 @@ def test_download_counts_file(session, data_provider, client, setup_router_db, d assert "hgvs_pro" in columns assert "hgvs_splice" not in columns + ######################################################################################################################## -# Fetching clinical controls for a score set +# Fetching clinical controls and control options for a score set ######################################################################################################################## @@ -2505,3 +2507,68 @@ def test_cannot_fetch_clinical_controls_for_score_set_when_none_exist( f"No clinical control variants matching the provided filters associated with score set URN {score_set['urn']} were found" in response_data["detail"] ) + + +def test_can_fetch_current_clinical_control_options_for_score_set( + client, setup_router_db, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinical_controls_to_mapped_variants(session, score_set) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls/options") + assert response.status_code == 200 + + response_data = response.json() + assert TEST_SAVED_CLINVAR_CONTROL["dbName"] in response_data["controlOptions"] + assert TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"] in response_data["controlOptions"] + assert len(response_data["controlOptions"][TEST_SAVED_CLINVAR_CONTROL["dbName"]]) == 1 + assert len(response_data["controlOptions"][TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"]]) == 1 + assert ( + TEST_SAVED_CLINVAR_CONTROL["dbVersion"] in response_data["controlOptions"][TEST_SAVED_CLINVAR_CONTROL["dbName"]] + ) + assert ( + TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbVersion"] + in response_data["controlOptions"][TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"]] + ) + + +def test_cannot_fetch_clinical_control_options_for_nonexistent_score_set( + client, setup_router_db, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + link_clinical_controls_to_mapped_variants(session, score_set) + + response = client.get(f"/api/v1/score-sets/{score_set['urn']+'xxx'}/clinical-controls/options") + + assert response.status_code == 404 + response_data = response.json() + assert f"score set with URN '{score_set['urn']+'xxx'}' not found" in response_data["detail"] + + +def test_cannot_fetch_clinical_controls_options_for_score_set_when_none_exist( + client, setup_router_db, session, data_provider, data_files +): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_mapped_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + + # removes all clinical controls from the db. + session.execute(delete(ClinicalControl)) + session.commit() + + response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls/options") + print(response.json()) + + assert response.status_code == 404 + response_data = response.json() + assert ( + f"no clinical control variants associated with score set URN {score_set['urn']} were found" + in response_data["detail"] + ) From bd0d3897132c2f7672c4a1e038134cc089f4749c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 24 Feb 2025 14:37:47 -0800 Subject: [PATCH 051/166] Format changes to clinical control options --- src/mavedb/routers/score_sets.py | 18 +++++++++++----- src/mavedb/view_models/clinical_control.py | 4 ++-- tests/routers/test_score_set.py | 25 +++++++++++----------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index b2f88a25..dcf9cae2 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1,6 +1,6 @@ import logging from datetime import date -from typing import Any, List, Literal, Optional, Sequence +from typing import Any, List, Optional, Sequence, Union import pandas as pd import pydantic @@ -1198,7 +1198,12 @@ async def get_clinical_controls_for_score_set( clinical_controls_with_mapped_variant = [] for control_variant in clinical_controls_for_item: control_variant.mapped_variants = [ - mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id + # As of now, we only have linked clingen allele IDs for v1.3 VRS. Once v2.0 has been linked to clingen allele IDs, + # we can transition to the other filter. + mv + for mv in control_variant.mapped_variants + if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id + # mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id ] if control_variant.mapped_variants: @@ -1222,7 +1227,7 @@ async def get_clinical_controls_for_score_set( @router.get( "/score-sets/{urn}/clinical-controls/options", status_code=200, - response_model=clinical_control.ClinicalControlOptions, + response_model=list[clinical_control.ClinicalControlOptions], response_model_exclude_none=True, ) async def get_clinical_controls_options_for_score_set( @@ -1231,7 +1236,7 @@ async def get_clinical_controls_options_for_score_set( # We'd prefer to reserve `db` as a query parameter. db: Session = Depends(deps.get_db), user_data: UserData = Depends(get_current_user), -) -> dict[Literal["control_options"], dict[str, list[str]]]: +) -> list[dict[str, Union[str, list[str]]]]: """ Fetch clinical control options for a given score set. """ @@ -1272,4 +1277,7 @@ async def get_clinical_controls_options_for_score_set( detail=f"no clinical control variants associated with score set URN {urn} were found", ) - return {"control_options": clinical_control_options} + return [ + dict(zip(("db_name", "available_versions"), (db_name, db_versions))) + for db_name, db_versions in clinical_control_options.items() + ] diff --git a/src/mavedb/view_models/clinical_control.py b/src/mavedb/view_models/clinical_control.py index 8c8aa09c..ed9f1dd2 100644 --- a/src/mavedb/view_models/clinical_control.py +++ b/src/mavedb/view_models/clinical_control.py @@ -52,8 +52,8 @@ class ClinicalControlWithMappedVariants(SavedClinicalControlWithMappedVariants): class ClinicalControlOptions(BaseModel): - # {"db_name": ["db_version", "db_version", ...]} - control_options: dict[str, list[str]] + db_name: str + available_versions: list[str] # ruff: noqa: E402 diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 03851672..720bf07d 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -2522,17 +2522,18 @@ def test_can_fetch_current_clinical_control_options_for_score_set( assert response.status_code == 200 response_data = response.json() - assert TEST_SAVED_CLINVAR_CONTROL["dbName"] in response_data["controlOptions"] - assert TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"] in response_data["controlOptions"] - assert len(response_data["controlOptions"][TEST_SAVED_CLINVAR_CONTROL["dbName"]]) == 1 - assert len(response_data["controlOptions"][TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"]]) == 1 - assert ( - TEST_SAVED_CLINVAR_CONTROL["dbVersion"] in response_data["controlOptions"][TEST_SAVED_CLINVAR_CONTROL["dbName"]] - ) - assert ( - TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbVersion"] - in response_data["controlOptions"][TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"]] - ) + assert len(response_data) == 2 + for control_option in response_data: + assert len(control_option["availableVersions"]) == 1 + assert control_option["dbName"] in ( + TEST_SAVED_CLINVAR_CONTROL["dbName"], + TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbName"], + ) + assert all( + control_version + in (TEST_SAVED_CLINVAR_CONTROL["dbVersion"], TEST_SAVED_GENERIC_CLINICAL_CONTROL["dbVersion"]) + for control_version in control_option["availableVersions"] + ) def test_cannot_fetch_clinical_control_options_for_nonexistent_score_set( @@ -2551,7 +2552,7 @@ def test_cannot_fetch_clinical_control_options_for_nonexistent_score_set( assert f"score set with URN '{score_set['urn']+'xxx'}' not found" in response_data["detail"] -def test_cannot_fetch_clinical_controls_options_for_score_set_when_none_exist( +def test_cannot_fetch_clinical_control_options_for_score_set_when_none_exist( client, setup_router_db, session, data_provider, data_files ): experiment = create_experiment(client) From 9071ad41efa896d2ed9e378e8ea6b28c5221b388 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 24 Feb 2025 21:28:41 -0800 Subject: [PATCH 052/166] Fix foreign key downgrade constraint name --- alembic/versions/34026092c7f8_clinvar_variant_table.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alembic/versions/34026092c7f8_clinvar_variant_table.py b/alembic/versions/34026092c7f8_clinvar_variant_table.py index 730c8e66..17031c49 100644 --- a/alembic/versions/34026092c7f8_clinvar_variant_table.py +++ b/alembic/versions/34026092c7f8_clinvar_variant_table.py @@ -48,7 +48,9 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint("mapped_variants_clinvar_variant_foreign_key_constraint", "mapped_variants", type_="foreignkey") + op.drop_constraint( + "mapped_variant_clinvar_variant_id_foreign_key_constraint", "mapped_variants", type_="foreignkey" + ) op.drop_index(op.f("ix_mapped_variants_clinvar_variant_id"), table_name="mapped_variants") op.drop_column("mapped_variants", "clinvar_variant_id") op.drop_index(op.f("ix_clinvar_variants_allele_id"), table_name="clinvar_variants") From 8f1002bd664814fc3e7a1c79043ff64f35a1b80c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 4 Mar 2025 11:07:57 -0800 Subject: [PATCH 053/166] Rebase alembic revision ordering --- .../e8a3b5d8f885_add_clingen_allele_ids.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py index 53f75752..7b7947d6 100644 --- a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py +++ b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py @@ -1,26 +1,27 @@ """Add ClinGen allele IDs Revision ID: e8a3b5d8f885 -Revises: aa73d39b3705 +Revises: c404b6719110 Create Date: 2025-01-27 18:55:09.283855 """ + from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = 'e8a3b5d8f885' -down_revision = 'aa73d39b3705' +revision = "e8a3b5d8f885" +down_revision = "c404b6719110" branch_labels = None depends_on = None def upgrade(): - op.add_column('variants', sa.Column('clingen_allele_id', sa.String(), nullable=True)) - op.create_index(op.f('ix_variants_clingen_allele_id'), 'variants', ['clingen_allele_id'], unique=False) + op.add_column("variants", sa.Column("clingen_allele_id", sa.String(), nullable=True)) + op.create_index(op.f("ix_variants_clingen_allele_id"), "variants", ["clingen_allele_id"], unique=False) def downgrade(): - op.drop_index(op.f('ix_variants_clingen_allele_id'), table_name='variants') - op.drop_column('variants', 'clingen_allele_id') + op.drop_index(op.f("ix_variants_clingen_allele_id"), table_name="variants") + op.drop_column("variants", "clingen_allele_id") From cbfb1396fc87d461254577df62a12ba9e573a195 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 4 Mar 2025 11:29:48 -0800 Subject: [PATCH 054/166] Current and 2.0 filter for mapped variants --- src/mavedb/routers/score_sets.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index dcf9cae2..56b7d5bd 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1200,10 +1200,14 @@ async def get_clinical_controls_for_score_set( control_variant.mapped_variants = [ # As of now, we only have linked clingen allele IDs for v1.3 VRS. Once v2.0 has been linked to clingen allele IDs, # we can transition to the other filter. + # Staging filter + # mv + # for mv in control_variant.mapped_variants + # if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id + # Production filter mv for mv in control_variant.mapped_variants - if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id - # mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id + if mv.current and mv.variant.score_set_id == item.id ] if control_variant.mapped_variants: From f88b85678bae1ba25462d46b3836c951eacc2b43 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 4 Mar 2025 11:37:55 -0800 Subject: [PATCH 055/166] tmp: Staging filter. Drop this commit prior to release. --- src/mavedb/routers/score_sets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 56b7d5bd..b4899212 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1201,13 +1201,13 @@ async def get_clinical_controls_for_score_set( # As of now, we only have linked clingen allele IDs for v1.3 VRS. Once v2.0 has been linked to clingen allele IDs, # we can transition to the other filter. # Staging filter - # mv - # for mv in control_variant.mapped_variants - # if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id - # Production filter mv for mv in control_variant.mapped_variants - if mv.current and mv.variant.score_set_id == item.id + if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id + # Production filter + # mv + # for mv in control_variant.mapped_variants + # if mv.current and mv.variant.score_set_id == item.id ] if control_variant.mapped_variants: From 6b3d8e1508eb33118928eb8807144b15e668ae01 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 24 Mar 2025 13:35:35 -0700 Subject: [PATCH 056/166] Make allele ID column alembic upgrade revise stats mat view --- alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py index 7b7947d6..5ac69502 100644 --- a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py +++ b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py @@ -1,7 +1,7 @@ """Add ClinGen allele IDs Revision ID: e8a3b5d8f885 -Revises: c404b6719110 +Revises: b85bc7b1bec7 Create Date: 2025-01-27 18:55:09.283855 """ @@ -12,7 +12,7 @@ # revision identifiers, used by Alembic. revision = "e8a3b5d8f885" -down_revision = "c404b6719110" +down_revision = "b85bc7b1bec7" branch_labels = None depends_on = None From e436b4922d891b47fdb8f0db43cdb1f0639ba4df Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 26 Mar 2025 14:13:20 -0700 Subject: [PATCH 057/166] wip: clingen ldh submission --- src/mavedb/lib/clingen/__init__.py | 0 src/mavedb/lib/clingen/constants.py | 11 ++ .../lib/clingen/content_constructors.py | 66 ++++++++++ src/mavedb/lib/clingen/linked_data_hub.py | 124 ++++++++++++++++++ src/mavedb/lib/types/__init__.py | 0 src/mavedb/lib/types/clingen.py | 76 +++++++++++ src/mavedb/lib/utils.py | 26 ++++ 7 files changed, 303 insertions(+) create mode 100644 src/mavedb/lib/clingen/__init__.py create mode 100644 src/mavedb/lib/clingen/constants.py create mode 100644 src/mavedb/lib/clingen/content_constructors.py create mode 100644 src/mavedb/lib/clingen/linked_data_hub.py create mode 100644 src/mavedb/lib/types/__init__.py create mode 100644 src/mavedb/lib/types/clingen.py create mode 100644 src/mavedb/lib/utils.py diff --git a/src/mavedb/lib/clingen/__init__.py b/src/mavedb/lib/clingen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py new file mode 100644 index 00000000..27424c4d --- /dev/null +++ b/src/mavedb/lib/clingen/constants.py @@ -0,0 +1,11 @@ +import os + +GENBOREE_ACCOUNT_NAME = os.getenv("GENBOREE_ACCOUNT_NAME") +GENBOREE_ACCOUNT_PASSWORD = os.getenv("GENBOREE_ACCOUNT_PASSWORD") + +CLIN_GEN_TENANT = os.getenv("CLIN_GEN_TENANT") + +LDH_SUBMISSION_TYPE = "cg-ldh-ld-submission" +LDH_ENTITY_NAME = "MaveDbMapping" + +MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api" diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py new file mode 100644 index 00000000..eab5b457 --- /dev/null +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -0,0 +1,66 @@ +from datetime import datetime +from uuid import uuid4 + +from mavedb import __version__ +from mavedb.lib.types.clingen import LdhContentLinkedData, LdhContentSubject, LdhEvent, LdhSubmission +from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE, MAVEDB_BASE_GIT +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.variant import Variant + + +def construct_ldh_submission_event(sbj: LdhContentSubject) -> LdhEvent: + return { + "type": LDH_SUBMISSION_TYPE, + "name": LDH_ENTITY_NAME, + "uuid": uuid4(), + "sbj": { + "id": sbj["Variant"]["hgvs"], + "type": "Variant", + "format": "hgvs", + "add": True, + }, + "triggered": { + "by": { + "host": MAVEDB_BASE_GIT, + "id": "resource_published", + "iri": f"{MAVEDB_BASE_GIT}/releases/tag/v{__version__}", + }, + "at": datetime.now().isoformat(), + }, + } + + +def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject: + return {"Variant": {"hgvs": hgvs}} + + +def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVariant) -> LdhContentLinkedData: + return { + "MaveDbMapping": [ + { + # TODO#372: We try to make all possible fields that are non-nullable represented that way. + "mavedb_id": variant.urn, # type: ignore + "pre_mapped": mapped_variant.pre_mapped, # type: ignore + "post_mapped": mapped_variant.post_mapped, # type: ignore + "mapping_api_version": mapped_variant.mapping_api_version, # type: ignore + "score": variant.data["score_data"]["score"], # type: ignore + } + ] + } + + +def construct_ldh_submission(variant_content: list[tuple[str, Variant, MappedVariant]]) -> list[LdhSubmission]: + content_submission: list[LdhSubmission] = [] + for hgvs, variant, mapped_variant in variant_content: + subject = construct_ldh_submission_subject(hgvs) + event = construct_ldh_submission_event(subject) + entity = construct_ldh_submission_entity(variant, mapped_variant) + + content_submission.append( + { + "event": event, + "content": {"sbj": subject, "ld": entity}, + } + ) + + return content_submission diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py new file mode 100644 index 00000000..063099df --- /dev/null +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -0,0 +1,124 @@ +import logging +import os +from datetime import datetime +from itertools import islice +from typing import Optional + +import requests +from jose import jwt + +from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD +from mavedb.lib.types.clingen import LdhSubmission +from mavedb.lib.utils import request_with_backoff + +logger = logging.getLogger(__name__) + + +class ClinGenLdhService: + def __init__(self, url: str) -> None: + self.url = url + + def authenticate(self) -> str: + logger.info(msg="Attempting to use an existing Genboree JWT.", extra=logging_context()) + + if existing_jwt := self._existing_jwt(): + logger.info(msg="Using existing Genboree JWT for authentication.", extra=logging_context()) + return existing_jwt + + logger.info( + msg="No existing or valid Genboree JWT found. Authenticating via Genboree services.", + extra=logging_context(), + ) + + try: + assert GENBOREE_ACCOUNT_NAME is not None, "Genboree account name is not set." + assert GENBOREE_ACCOUNT_PASSWORD is not None, "Genboree account password is not set." + except AssertionError as exc: + msg = "Genboree account name and/or password are not set. Unable to authenticate with Genboree services." + save_to_logging_context(format_raised_exception_info_as_dict(exc)) + logger.error(msg=msg, extra=logging_context()) + raise ValueError(msg) + + auth_url = f"https://genboree.org/auth/usr/gb:{GENBOREE_ACCOUNT_NAME}/auth" + auth_body = {"type": "plain", "val": GENBOREE_ACCOUNT_PASSWORD} + auth_response = requests.post(auth_url, json=auth_body) + try: + auth_response.raise_for_status() + except requests.exceptions.HTTPError as exc: + save_to_logging_context(format_raised_exception_info_as_dict(exc)) + logger.error(msg="Failed to authenticate with Genboree services.", exc_info=exc, extra=logging_context()) + raise exc + + auth_jwt = auth_response.json().get("data", {}).get("jwt") + + try: + assert auth_jwt is not None, "No JWT in response." + except AssertionError as exc: + msg = "Failed to authenticate with Genboree services. Could not parse JWT from valid response." + save_to_logging_context(format_raised_exception_info_as_dict(exc)) + logger.error(msg=msg, extra=logging_context()) + raise ValueError(msg) + + # TODO#411: We should consider using a secret manager to store persistent/setable secrets like this. + os.environ["GENBOREE_JWT"] = auth_jwt + logger.info(msg="Successfully authenticated with Genboree services.", extra=logging_context()) + return auth_jwt + + def dispatch_submissions( + self, content_submissions: list[LdhSubmission], batch_size: Optional[int] = None + ) -> tuple[list, list]: + # TODO: When we upgrade to Python 3.12, we can replace this with the built-in `itertools.batched` method. + if batch_size is not None: + batched_submissions: list[list[LdhSubmission]] = [] + while batch := tuple(islice(content_submissions, batch_size)): + batched_submissions.append(list(batch)) + + save_to_logging_context({"ldh_submission_batch_size": batch_size}) + save_to_logging_context({"ldh_submission_batch_count": len(batched_submissions)}) + + submission_successes = [] + submission_failures = [] + submissions = batched_submissions if batch_size is not None else content_submissions + save_to_logging_context({"ldh_submission_count": len(submissions)}) + + logger.debug(msg="Dispatching ldh submissions.", extra=logging_context()) + for content in submissions: + try: + response = request_with_backoff( + method="PUT", + url=self.url, + json=content, + headers={"Authorization": f"Bearer {self.authenticate()}"}, + ) + submission_successes.append(response.json()) + except requests.exceptions.RequestException as exc: + save_to_logging_context(format_raised_exception_info_as_dict(exc)) + logger.error(msg="Failed to dispatch ldh submission.", exc_info=exc, extra=logging_context()) + submission_failures.append(content) + continue + + save_to_logging_context( + { + "ldh_submission_success_count": len(submission_successes), + "ldh_submission_failure_count": len(submission_failures), + } + ) + logger.info(msg="Done dispatching ldh submissions", extra=logging_context()) + return submission_successes, submission_failures + + def _existing_jwt(self) -> Optional[str]: + existing_jwt = os.getenv("GENBOREE_JWT") + + if not existing_jwt: + logger.debug(msg="No existing Genboree JWT was set.") + return None + + expiration = jwt.get_unverified_claims(existing_jwt).get("exp", datetime.now().timestamp()) + + if expiration > datetime.now().timestamp(): + logger.debug(msg="Found existing and valid Genboree JWT.") + return existing_jwt + + logger.debug(msg="Found existing but expired Genboree JWT.") + return None diff --git a/src/mavedb/lib/types/__init__.py b/src/mavedb/lib/types/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py new file mode 100644 index 00000000..79327dda --- /dev/null +++ b/src/mavedb/lib/types/clingen.py @@ -0,0 +1,76 @@ +from typing import TypedDict, Literal +from typing_extensions import NotRequired +from uuid import UUID + + +# See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body + + +### Linked Data Hub Event Type + + +# The subject of the event (ie. the entity that the event is about) +class EventSbj(TypedDict): + id: str + type: str + format: Literal["hgvs", "alleleRegistryID", "clinvarID", "geneSymbol"] + add: bool + + +# Who/what triggered the event +class EventTriggerer(TypedDict): + host: str + id: str + iri: str + + +class EventTrigger(TypedDict): + by: EventTriggerer + at: str + + +class LdhEvent(TypedDict): + type: str + name: str + uuid: UUID + sbj: EventSbj + triggered: EventTrigger + + +### Linked Data Hub Content Types + + +# The subject of the content submission +class LdhSubjectVariant(TypedDict): + id: NotRequired[str] + hgvs: str + + +class LdhContentSubject(TypedDict): + Variant: LdhSubjectVariant + + +# The entities we are submitting +class LdhMapping(TypedDict): + mavedb_id: str + pre_mapped: str + post_mapped: str + mapping_api_version: str + score: float + + +class LdhContentLinkedData(TypedDict): + MaveDbMapping: list[LdhMapping] + + +### Linked Data Hub Submission Type + + +class LdhSubmissionContent(TypedDict): + sbj: LdhContentSubject + ld: LdhContentLinkedData + + +class LdhSubmission(TypedDict): + event: LdhEvent + content: LdhSubmissionContent diff --git a/src/mavedb/lib/utils.py b/src/mavedb/lib/utils.py new file mode 100644 index 00000000..b53f026d --- /dev/null +++ b/src/mavedb/lib/utils.py @@ -0,0 +1,26 @@ +import logging +import requests +import time + + +logger = logging.getLogger(__name__) + + +def request_with_backoff( + method: str, url: str, backoff_limit: int = 5, backoff_wait: int = 10, **kwargs +) -> requests.Response: + attempt = 0 + while attempt <= backoff_limit: + logger.debug(f"Attempting request to {url}. This is attempt {attempt+1}.") + try: + response = requests.request(method=method, url=url, **kwargs) + response.raise_for_status() + return response + except requests.exceptions.RequestException as exc: + logger.warning(f"Request to {url} failed.", exc_info=exc) + backoff_time = backoff_wait * (2**attempt) + attempt += 1 + logger.info(f"Retrying request to {url} in {backoff_wait} seconds.") + time.sleep(backoff_time) + + raise requests.exceptions.RequestException(f"Request to {url} failed after {backoff_limit} attempts.") From db47a69570202cf00b8e1d74f6158ecb6aa2d07f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Mar 2025 12:04:06 -0700 Subject: [PATCH 058/166] wip: clingen ldh submission job --- src/mavedb/lib/clingen/constants.py | 3 + src/mavedb/worker/jobs.py | 145 ++++++++++++++++++++++++++-- 2 files changed, 141 insertions(+), 7 deletions(-) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 27424c4d..1cf7b7b8 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -9,3 +9,6 @@ LDH_ENTITY_NAME = "MaveDbMapping" MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api" + +DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 +LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_NAME}" diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 2219a496..f967edde 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -15,6 +15,9 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.db.view import refresh_all_mat_views +from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL +from mavedb.lib.clingen.content_constructors import construct_ldh_submission +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService from mavedb.lib.exceptions import MappingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import ( @@ -43,13 +46,9 @@ BACKOFF_IN_SECONDS = 15 -@asynccontextmanager -async def mapping_in_execution(redis: ArqRedis, job_id: str): - await redis.set(MAPPING_CURRENT_ID_NAME, job_id) - try: - yield - finally: - await redis.set(MAPPING_CURRENT_ID_NAME, "") +#################################################################################################### +# Job utilities +#################################################################################################### def setup_job_state( @@ -89,6 +88,11 @@ async def enqueue_job_with_backoff( return (new_job_id, not limit_reached, backoff) +#################################################################################################### +# Creating variants +#################################################################################################### + + async def create_variants_for_score_set( ctx, correlation_id: str, score_set_id: int, updater_id: int, scores: pd.DataFrame, counts: pd.DataFrame ): @@ -223,6 +227,20 @@ async def create_variants_for_score_set( return {"success": True} +#################################################################################################### +# Mapping variants +#################################################################################################### + + +@asynccontextmanager +async def mapping_in_execution(redis: ArqRedis, job_id: str): + await redis.set(MAPPING_CURRENT_ID_NAME, job_id) + try: + yield + finally: + await redis.set(MAPPING_CURRENT_ID_NAME, "") + + async def map_variants_for_score_set( ctx: dict, correlation_id: str, score_set_id: int, updater_id: int, attempt: int = 1 ) -> dict: @@ -659,6 +677,11 @@ async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int return {"success": False, "enqueued_job": new_job_id} +#################################################################################################### +# Materialized Views +#################################################################################################### + + # TODO#405: Refresh materialized views within an executor. async def refresh_materialized_views(ctx: dict): logging_context = setup_job_state(ctx, None, None, None) @@ -674,3 +697,111 @@ async def refresh_published_variants_view(ctx: dict, correlation_id: str): PublishedVariantsMV.refresh(ctx["db"]) logger.debug(msg="Done refreshing of published variants materialized view.", extra=logging_context) return {"success": True} + + +#################################################################################################### +# ClinGen resource creation / linkage +#################################################################################################### + + +async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int, publisher_id: int): + logging_context = {} + score_set = None + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, publisher_id, score_set.urn, correlation_id) + logger.info(msg="Started LDH mapped resource submission", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to submit LDH objects for this score set." + + logging_context["current_ldh_submission_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) + + except Exception as e: + send_slack_message(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) + ldh_service.authenticate() + except Exception as e: + send_slack_message(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to connect to the LDH. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + variant_objects = db.scalars( + select(Variant, MappedVariant) + .join(MappedVariant) + .where( + Variant.score_set_id == score_set.id, + MappedVariant.current.is_(True), + MappedVariant.post_mapped.is_not(None), + ) + ).all() + + if not variant_objects: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", + extra=logging_context, + ) + return {"success": True, "retried": False} + + variant_content = [ + (mapped_variant.post_mapped[""], variant, mapped_variant) for variant, mapped_variant in variant_objects + ] + submission_content = construct_ldh_submission(variant_content) + + except Exception as e: + send_slack_message(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + blocking = functools.partial( + ldh_service.dispatch_submissions, submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + loop = asyncio.get_running_loop() + submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_message(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", + extra=logging_context, + ) + + try: + assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." + logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) + except AssertionError as e: + send_slack_message(e) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + return {"success": True, "retried": False} From ddd50790d53bc57618ee5f1cdd5465f8d66e6d49 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 8 Apr 2025 15:10:56 -0700 Subject: [PATCH 059/166] wip: clingen submission script --- .../lib/clingen/content_constructors.py | 17 ++-- src/mavedb/scripts/clingen_ldh_submission.py | 90 +++++++++++++++++++ src/mavedb/worker/jobs.py | 3 +- 3 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 src/mavedb/scripts/clingen_ldh_submission.py diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index eab5b457..2e651749 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -38,12 +38,17 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVari return { "MaveDbMapping": [ { - # TODO#372: We try to make all possible fields that are non-nullable represented that way. - "mavedb_id": variant.urn, # type: ignore - "pre_mapped": mapped_variant.pre_mapped, # type: ignore - "post_mapped": mapped_variant.post_mapped, # type: ignore - "mapping_api_version": mapped_variant.mapping_api_version, # type: ignore - "score": variant.data["score_data"]["score"], # type: ignore + "entContent": { + # TODO#372: We try to make all possible fields that are non-nullable represented that way. + "mavedb_id": variant.urn, # type: ignore + "pre_mapped": mapped_variant.pre_mapped, # type: ignore + "post_mapped": mapped_variant.post_mapped, # type: ignore + "mapping_api_version": mapped_variant.mapping_api_version, # type: ignore + "score": variant.data["score_data"]["score"], # type: ignore + }, + "entId": variant.urn, # type: ignore + # TODO: We should have some sort of constant for our base url. + "entIri": f"https://staging.mavedb.org/score-sets/{variant.urn}", # type: ignore } ] } diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py new file mode 100644 index 00000000..c2278252 --- /dev/null +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -0,0 +1,90 @@ +import click +import logging +from typing import Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.scripts.environment import with_database_session +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService +from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL +from mavedb.lib.clingen.content_constructors import construct_ldh_submission + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def submit_urns_to_clingen(db: Session, urns: list[str]) -> None: + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) + ldh_service.authenticate() + + for urn in urns: + try: + score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() + if not score_set: + logger.warning(f"No score set found for URN: {urn}") + continue + + logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") + variant_objects = db.scalars( + select(Variant, MappedVariant) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == urn) + .where(MappedVariant.current.is_(True)) + ).all() + + if not variant_objects: + logger.warning(f"No mapped variants found for score set with URN: {urn}") + continue + + variant_content = [ + ( + mapped_variant.post_mapped["variation"]["expressions"][0]["value"], + variant, + mapped_variant, + ) + for variant, mapped_variant in variant_objects + ] + submission_content = construct_ldh_submission(variant_content) + + submission_successes, submission_failures = ldh_service.dispatch_submissions( + submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE + ) + + if submission_failures: + logger.error(f"Failed to submit some variants for URN: {urn}") + else: + logger.info(f"Successfully submitted all variants for URN: {urn}") + + except Exception as e: + logger.error(f"Error processing URN {urn}: {e}") + + +@click.command() +@with_database_session +@click.argument("urns", nargs=-1) +@click.option("--all", help="Submit mapped variants for every score set in MaveDB.", is_flag=True) +def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool) -> None: + """ + Submit data to ClinGen for mapped variant allele ID generation for the given URNs. + """ + if urns and all: + logger.error("Cannot provide both URNs and --all option.") + return + + if all: + urns = db.scalars(select(ScoreSet.urn)).all() + + if not urns: + logger.error("No URNs provided. Please provide at least one URN.") + return + + submit_urns_to_clingen(db, urns) + + +if __name__ == "__main__": + submit_clingen_urns_command() diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index f967edde..fc3d1786 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -762,7 +762,8 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": True, "retried": False} variant_content = [ - (mapped_variant.post_mapped[""], variant, mapped_variant) for variant, mapped_variant in variant_objects + (mapped_variant.post_mapped["variation"]["expressions"][0]["value"], variant, mapped_variant) + for variant, mapped_variant in variant_objects ] submission_content = construct_ldh_submission(variant_content) From 733b086192355c9a8e9891f1a3631ce13bc82638 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 8 Apr 2025 15:14:59 -0700 Subject: [PATCH 060/166] Alembic revision reorder from rebase --- alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py index 5ac69502..da58494a 100644 --- a/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py +++ b/alembic/versions/e8a3b5d8f885_add_clingen_allele_ids.py @@ -1,7 +1,7 @@ """Add ClinGen allele IDs Revision ID: e8a3b5d8f885 -Revises: b85bc7b1bec7 +Revises: 4726e4dddde8 Create Date: 2025-01-27 18:55:09.283855 """ @@ -12,7 +12,7 @@ # revision identifiers, used by Alembic. revision = "e8a3b5d8f885" -down_revision = "b85bc7b1bec7" +down_revision = "4726e4dddde8" branch_labels = None depends_on = None From 81bf50024a2fd01d52bc0b90e5ba0f074effe9d2 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 9 Apr 2025 15:20:36 -0700 Subject: [PATCH 061/166] wip: clingen submission and linkage scripts --- src/mavedb/lib/clingen/constants.py | 6 +- .../lib/clingen/content_constructors.py | 6 +- src/mavedb/lib/clingen/linked_data_hub.py | 34 ++++----- src/mavedb/lib/types/clingen.py | 13 ++-- src/mavedb/lib/utils.py | 10 +++ src/mavedb/scripts/clingen_ldh_submission.py | 39 ++++++++--- src/mavedb/scripts/link_clingen_variants.py | 70 +++++++++++++++++++ 7 files changed, 144 insertions(+), 34 deletions(-) create mode 100644 src/mavedb/scripts/link_clingen_variants.py diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 1cf7b7b8..26e68ae1 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -6,9 +6,11 @@ CLIN_GEN_TENANT = os.getenv("CLIN_GEN_TENANT") LDH_SUBMISSION_TYPE = "cg-ldh-ld-submission" -LDH_ENTITY_NAME = "MaveDbMapping" +LDH_ENTITY_NAME = "MaveDBMapping" +LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/ MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api" DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 -LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_NAME}" +LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" +LDH_LINKED_DATA_URL = f"https://genboree.org/{CLIN_GEN_TENANT}/{LDH_ENTITY_NAME}/id" diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index 2e651749..c60738c3 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -12,12 +12,12 @@ def construct_ldh_submission_event(sbj: LdhContentSubject) -> LdhEvent: return { "type": LDH_SUBMISSION_TYPE, "name": LDH_ENTITY_NAME, - "uuid": uuid4(), + "uuid": str(uuid4()), "sbj": { "id": sbj["Variant"]["hgvs"], "type": "Variant", "format": "hgvs", - "add": True, + "add": "true", }, "triggered": { "by": { @@ -36,10 +36,10 @@ def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject: def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVariant) -> LdhContentLinkedData: return { + # TODO#372: We try to make all possible fields that are non-nullable represented that way. "MaveDbMapping": [ { "entContent": { - # TODO#372: We try to make all possible fields that are non-nullable represented that way. "mavedb_id": variant.urn, # type: ignore "pre_mapped": mapped_variant.pre_mapped, # type: ignore "post_mapped": mapped_variant.post_mapped, # type: ignore diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index 063099df..a7bd4778 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -1,7 +1,6 @@ import logging import os from datetime import datetime -from itertools import islice from typing import Optional import requests @@ -10,7 +9,7 @@ from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.types.clingen import LdhSubmission -from mavedb.lib.utils import request_with_backoff +from mavedb.lib.utils import batched, request_with_backoff logger = logging.getLogger(__name__) @@ -68,30 +67,31 @@ def authenticate(self) -> str: def dispatch_submissions( self, content_submissions: list[LdhSubmission], batch_size: Optional[int] = None ) -> tuple[list, list]: - # TODO: When we upgrade to Python 3.12, we can replace this with the built-in `itertools.batched` method. - if batch_size is not None: - batched_submissions: list[list[LdhSubmission]] = [] - while batch := tuple(islice(content_submissions, batch_size)): - batched_submissions.append(list(batch)) - - save_to_logging_context({"ldh_submission_batch_size": batch_size}) - save_to_logging_context({"ldh_submission_batch_count": len(batched_submissions)}) - submission_successes = [] submission_failures = [] - submissions = batched_submissions if batch_size is not None else content_submissions - save_to_logging_context({"ldh_submission_count": len(submissions)}) + submissions = list(batched(content_submissions, batch_size)) if batch_size is not None else content_submissions + save_to_logging_context({"ldh_submission_count": len(content_submissions)}) - logger.debug(msg="Dispatching ldh submissions.", extra=logging_context()) - for content in submissions: + if batch_size is not None: + save_to_logging_context({"ldh_submission_batch_size": batch_size}) + save_to_logging_context({"ldh_submission_batch_count": len(submissions)}) + logger.debug("Batching ldh submissions.", extra=logging_context()) + + logger.info(msg=f"Dispatching {len(submissions)} ldh submissions...", extra=logging_context()) + for idx, content in enumerate(submissions): try: response = request_with_backoff( method="PUT", url=self.url, json=content, - headers={"Authorization": f"Bearer {self.authenticate()}"}, + headers={"Authorization": f"Bearer {self.authenticate()}", "Content-Type": "application/json"}, ) submission_successes.append(response.json()) + logger.debug( + msg=f"Successfully dispatched ldh submission ({idx+1} / {len(submissions)}).", + extra=logging_context(), + ) + except requests.exceptions.RequestException as exc: save_to_logging_context(format_raised_exception_info_as_dict(exc)) logger.error(msg="Failed to dispatch ldh submission.", exc_info=exc, extra=logging_context()) @@ -104,7 +104,7 @@ def dispatch_submissions( "ldh_submission_failure_count": len(submission_failures), } ) - logger.info(msg="Done dispatching ldh submissions", extra=logging_context()) + logger.info(msg="Done dispatching ldh submissions.", extra=logging_context()) return submission_successes, submission_failures def _existing_jwt(self) -> Optional[str]: diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 79327dda..171851f5 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -1,6 +1,5 @@ from typing import TypedDict, Literal from typing_extensions import NotRequired -from uuid import UUID # See: https://ldh.genome.network/docs/ldh/submit.html#content-submission-body @@ -14,7 +13,7 @@ class EventSbj(TypedDict): id: str type: str format: Literal["hgvs", "alleleRegistryID", "clinvarID", "geneSymbol"] - add: bool + add: Literal["true", "false"] # Who/what triggered the event @@ -32,7 +31,7 @@ class EventTrigger(TypedDict): class LdhEvent(TypedDict): type: str name: str - uuid: UUID + uuid: str sbj: EventSbj triggered: EventTrigger @@ -59,8 +58,14 @@ class LdhMapping(TypedDict): score: float +class LdhEntity(TypedDict): + entContent: LdhMapping + entId: str + entIri: str + + class LdhContentLinkedData(TypedDict): - MaveDbMapping: list[LdhMapping] + MaveDbMapping: list[LdhEntity] ### Linked Data Hub Submission Type diff --git a/src/mavedb/lib/utils.py b/src/mavedb/lib/utils.py index b53f026d..c4b13f3b 100644 --- a/src/mavedb/lib/utils.py +++ b/src/mavedb/lib/utils.py @@ -24,3 +24,13 @@ def request_with_backoff( time.sleep(backoff_time) raise requests.exceptions.RequestException(f"Request to {url} failed after {backoff_limit} attempts.") + + +# TODO: When we upgrade to Python 3.12, we can replace this with the built-in `itertools.batched` method. +def batched(iterable, n): + """ + Yield successive n-sized chunks from iterable. + """ + l = len(iterable) # noqa: E741 + for i in range(0, l, n): + yield iterable[i : min((i + n, l))] diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index c2278252..86c4a051 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -17,10 +17,23 @@ logger.setLevel(logging.DEBUG) -def submit_urns_to_clingen(db: Session, urns: list[str]) -> None: +def _variation_from_post_mapped(mapped_variant: MappedVariant) -> str: + """ + Extract the variation from the post_mapped field of the MappedVariant object. + """ + try: + # Assuming post_mapped is a dictionary with a specific structure + return mapped_variant.post_mapped["expressions"][0]["value"] # type: ignore + except KeyError: + return mapped_variant.post_mapped["variation"]["expressions"][0]["value"] # type: ignore + + +def submit_urns_to_clingen(db: Session, urns: Sequence[str]) -> list[str]: ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) ldh_service.authenticate() + submitted_entities = [] + for urn in urns: try: score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() @@ -29,7 +42,7 @@ def submit_urns_to_clingen(db: Session, urns: list[str]) -> None: continue logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") - variant_objects = db.scalars( + variant_objects = db.execute( select(Variant, MappedVariant) .join(MappedVariant) .join(ScoreSet) @@ -41,16 +54,17 @@ def submit_urns_to_clingen(db: Session, urns: list[str]) -> None: logger.warning(f"No mapped variants found for score set with URN: {urn}") continue + logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") variant_content = [ ( - mapped_variant.post_mapped["variation"]["expressions"][0]["value"], + _variation_from_post_mapped(mapped_variant), variant, mapped_variant, ) for variant, mapped_variant in variant_objects ] - submission_content = construct_ldh_submission(variant_content) + submission_content = construct_ldh_submission(variant_content) submission_successes, submission_failures = ldh_service.dispatch_submissions( submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE ) @@ -60,15 +74,20 @@ def submit_urns_to_clingen(db: Session, urns: list[str]) -> None: else: logger.info(f"Successfully submitted all variants for URN: {urn}") + submitted_entities.extend([variant[1].urn for variant in variant_content]) + except Exception as e: - logger.error(f"Error processing URN {urn}: {e}") + logger.error(f"Error processing URN {urn}", exc_info=e) + + return submitted_entities @click.command() @with_database_session @click.argument("urns", nargs=-1) @click.option("--all", help="Submit mapped variants for every score set in MaveDB.", is_flag=True) -def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool) -> None: +@click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) +def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool, suppress_output: bool) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. """ @@ -77,13 +96,17 @@ def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool) -> return if all: - urns = db.scalars(select(ScoreSet.urn)).all() + # TODO#372: non-nullable urns. + urns = db.scalars(select(ScoreSet.urn)).all() # type: ignore if not urns: logger.error("No URNs provided. Please provide at least one URN.") return - submit_urns_to_clingen(db, urns) + submitted_variant_urns = submit_urns_to_clingen(db, urns) + + if not suppress_output: + print(submitted_variant_urns) if __name__ == "__main__": diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py new file mode 100644 index 00000000..b9950f56 --- /dev/null +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -0,0 +1,70 @@ +import click +import requests +import logging +from typing import Optional, Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant +from mavedb.scripts.environment import with_database_session +from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def get_clingen_variation(urn: str) -> Optional[dict]: + response = requests.get( + f"{LDH_LINKED_DATA_URL}/{urn}", + headers={"Accept": "application/json"}, + ) + + if response.status_code == 200: + return response.json()["data"] + else: + logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") + return None + + +@click.command() +@with_database_session +@click.argument("urns", nargs=-1) +def link_clingen_variants(db: Session, urns: Sequence[str]) -> None: + """ + Submit data to ClinGen for mapped variant allele ID generation for the given URNs. + """ + if not urns: + logger.error("No URNs provided. Please provide at least one URN.") + return + + failed_urns = [] + for urn in urns: + ldh_variation = get_clingen_variation(urn) + + if not ldh_variation: + failed_urns.append(urn) + continue + + mapped_variant = db.scalar(select(MappedVariant).join(Variant).where(Variant.urn == urn)) + + if not mapped_variant: + logger.warning(f"No mapped variant found for URN {urn}.") + failed_urns.append(urn) + continue + + mapped_variant.clingen_allele_id = ldh_variation["id"] + db.add(mapped_variant) + db.commit() + + logger.info(f"Successfully linked URN {urn} to ClinGen variation {ldh_variation['id']}.") + + if failed_urns: + logger.warning(f"Failed to link the following URNs: {', '.join(failed_urns)}") + + logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)} URNs successfully.") + + +if __name__ == "__main__": + link_clingen_variants() From cee36d2cc2d3c3a7914f76ff8ebe4e0724d8c79f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 09:25:37 -0700 Subject: [PATCH 062/166] Script and LDH Connection Fixes --- src/mavedb/lib/clingen/constants.py | 3 +- .../lib/clingen/content_constructors.py | 9 ++---- src/mavedb/lib/clingen/linked_data_hub.py | 16 +++++----- src/mavedb/lib/types/clingen.py | 4 +-- src/mavedb/scripts/clingen_ldh_submission.py | 25 +++++++++++---- src/mavedb/scripts/link_clingen_variants.py | 31 ++++++++++++++----- 6 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 26e68ae1..ad21a12a 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -4,6 +4,7 @@ GENBOREE_ACCOUNT_PASSWORD = os.getenv("GENBOREE_ACCOUNT_PASSWORD") CLIN_GEN_TENANT = os.getenv("CLIN_GEN_TENANT") +LDH_TENANT = os.getenv("LDH_TENANT") LDH_SUBMISSION_TYPE = "cg-ldh-ld-submission" LDH_ENTITY_NAME = "MaveDBMapping" @@ -13,4 +14,4 @@ DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" -LDH_LINKED_DATA_URL = f"https://genboree.org/{CLIN_GEN_TENANT}/{LDH_ENTITY_NAME}/id" +LDH_LINKED_DATA_URL = f"https://genboree.org/{LDH_TENANT}/{LDH_ENTITY_NAME}/id" diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index c60738c3..858da5b5 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -13,12 +13,7 @@ def construct_ldh_submission_event(sbj: LdhContentSubject) -> LdhEvent: "type": LDH_SUBMISSION_TYPE, "name": LDH_ENTITY_NAME, "uuid": str(uuid4()), - "sbj": { - "id": sbj["Variant"]["hgvs"], - "type": "Variant", - "format": "hgvs", - "add": "true", - }, + "sbj": {"id": sbj["Variant"]["hgvs"], "type": "Variant", "format": "hgvs", "add": True}, "triggered": { "by": { "host": MAVEDB_BASE_GIT, @@ -37,7 +32,7 @@ def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject: def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVariant) -> LdhContentLinkedData: return { # TODO#372: We try to make all possible fields that are non-nullable represented that way. - "MaveDbMapping": [ + "MaveDBMapping": [ { "entContent": { "mavedb_id": variant.urn, # type: ignore diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index a7bd4778..afef79e2 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -1,9 +1,9 @@ import logging +import requests import os from datetime import datetime from typing import Optional -import requests from jose import jwt from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict @@ -19,13 +19,11 @@ def __init__(self, url: str) -> None: self.url = url def authenticate(self) -> str: - logger.info(msg="Attempting to use an existing Genboree JWT.", extra=logging_context()) - if existing_jwt := self._existing_jwt(): logger.info(msg="Using existing Genboree JWT for authentication.", extra=logging_context()) return existing_jwt - logger.info( + logger.debug( msg="No existing or valid Genboree JWT found. Authenticating via Genboree services.", extra=logging_context(), ) @@ -87,7 +85,7 @@ def dispatch_submissions( headers={"Authorization": f"Bearer {self.authenticate()}", "Content-Type": "application/json"}, ) submission_successes.append(response.json()) - logger.debug( + logger.info( msg=f"Successfully dispatched ldh submission ({idx+1} / {len(submissions)}).", extra=logging_context(), ) @@ -108,17 +106,19 @@ def dispatch_submissions( return submission_successes, submission_failures def _existing_jwt(self) -> Optional[str]: + logger.debug(msg="Checking for existing Genboree JWT.", extra=logging_context()) + existing_jwt = os.getenv("GENBOREE_JWT") if not existing_jwt: - logger.debug(msg="No existing Genboree JWT was set.") + logger.debug(msg="No existing Genboree JWT was set.", extra=logging_context()) return None expiration = jwt.get_unverified_claims(existing_jwt).get("exp", datetime.now().timestamp()) if expiration > datetime.now().timestamp(): - logger.debug(msg="Found existing and valid Genboree JWT.") + logger.debug(msg="Found existing and valid Genboree JWT.", extra=logging_context()) return existing_jwt - logger.debug(msg="Found existing but expired Genboree JWT.") + logger.debug(msg="Found existing but expired Genboree JWT.", extra=logging_context()) return None diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 171851f5..0d8710a1 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -13,7 +13,7 @@ class EventSbj(TypedDict): id: str type: str format: Literal["hgvs", "alleleRegistryID", "clinvarID", "geneSymbol"] - add: Literal["true", "false"] + add: bool # Who/what triggered the event @@ -65,7 +65,7 @@ class LdhEntity(TypedDict): class LdhContentLinkedData(TypedDict): - MaveDbMapping: list[LdhEntity] + MaveDBMapping: list[LdhEntity] ### Linked Data Hub Submission Type diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index 86c4a051..c538d81b 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -14,7 +14,6 @@ from mavedb.lib.clingen.content_constructors import construct_ldh_submission logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) def _variation_from_post_mapped(mapped_variant: MappedVariant) -> str: @@ -28,13 +27,19 @@ def _variation_from_post_mapped(mapped_variant: MappedVariant) -> str: return mapped_variant.post_mapped["variation"]["expressions"][0]["value"] # type: ignore -def submit_urns_to_clingen(db: Session, urns: Sequence[str]) -> list[str]: +def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> list[str]: ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) ldh_service.authenticate() submitted_entities = [] - for urn in urns: + if debug: + logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") + urns = urns[:1] + + for idx, urn in enumerate(urns): + logger.info(f"Processing URN: {urn}. (Scoreset {idx + 1}/{len(urns)})") + try: score_set = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one_or_none() if not score_set: @@ -64,6 +69,11 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str]) -> list[str]: for variant, mapped_variant in variant_objects ] + if debug: + logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") + variant_content = variant_content[:1] + + logger.debug(f"Constructing LDH submission for {len(variant_content)} variants") submission_content = construct_ldh_submission(variant_content) submission_successes, submission_failures = ldh_service.dispatch_submissions( submission_content, DEFAULT_LDH_SUBMISSION_BATCH_SIZE @@ -87,7 +97,10 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str]) -> list[str]: @click.argument("urns", nargs=-1) @click.option("--all", help="Submit mapped variants for every score set in MaveDB.", is_flag=True) @click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) -def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool, suppress_output: bool) -> None: +@click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen", is_flag=True) +def submit_clingen_urns_command( + db: Session, urns: Sequence[str], all: bool, suppress_output: bool, debug: bool +) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. """ @@ -103,10 +116,10 @@ def submit_clingen_urns_command(db: Session, urns: Sequence[str], all: bool, sup logger.error("No URNs provided. Please provide at least one URN.") return - submitted_variant_urns = submit_urns_to_clingen(db, urns) + submitted_variant_urns = submit_urns_to_clingen(db, urns, debug) if not suppress_output: - print(submitted_variant_urns) + print(", ".join(submitted_variant_urns)) if __name__ == "__main__": diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index b9950f56..51110ed0 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -2,10 +2,12 @@ import requests import logging from typing import Optional, Sequence +from urllib import parse from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant from mavedb.scripts.environment import with_database_session @@ -17,12 +19,12 @@ def get_clingen_variation(urn: str) -> Optional[dict]: response = requests.get( - f"{LDH_LINKED_DATA_URL}/{urn}", + f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, ) if response.status_code == 200: - return response.json()["data"] + return response.json()["data"]["ldFor"]["Variant"][0] else: logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") return None @@ -31,7 +33,8 @@ def get_clingen_variation(urn: str) -> Optional[dict]: @click.command() @with_database_session @click.argument("urns", nargs=-1) -def link_clingen_variants(db: Session, urns: Sequence[str]) -> None: +@click.option("--score-sets/--variants", default=False) +def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. """ @@ -39,6 +42,19 @@ def link_clingen_variants(db: Session, urns: Sequence[str]) -> None: logger.error("No URNs provided. Please provide at least one URN.") return + # Convert score set URNs to variant URNs. + if score_sets: + variants = [ + db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where(ScoreSet.urn == urn, MappedVariant.current.is_(True)) + ).all() + for urn in urns + ] + urns = [variant for sublist in variants for variant in sublist if variant is not None] + failed_urns = [] for urn in urns: ldh_variation = get_clingen_variation(urn) @@ -54,16 +70,15 @@ def link_clingen_variants(db: Session, urns: Sequence[str]) -> None: failed_urns.append(urn) continue - mapped_variant.clingen_allele_id = ldh_variation["id"] + mapped_variant.clingen_allele_id = ldh_variation["entId"] db.add(mapped_variant) - db.commit() - logger.info(f"Successfully linked URN {urn} to ClinGen variation {ldh_variation['id']}.") + logger.info(f"Successfully linked URN {urn} to ClinGen variation {ldh_variation['entId']}.") if failed_urns: - logger.warning(f"Failed to link the following URNs: {', '.join(failed_urns)}") + logger.warning(f"Failed to link the following {len(failed_urns)} URNs: {', '.join(failed_urns)}") - logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)} URNs successfully.") + logger.info(f"Linking process completed. Linked {len(urns) - len(failed_urns)}/{len(urns)} URNs successfully.") if __name__ == "__main__": From d0627616bae524751420e7f8e470be3252c11af5 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 11:43:17 -0700 Subject: [PATCH 063/166] Create distinction between Slack errors and messages --- src/mavedb/lib/slack.py | 20 ++++++---- src/mavedb/server_main.py | 4 +- src/mavedb/worker/jobs.py | 77 +++++++++++++++++++++++++++++++-------- 3 files changed, 76 insertions(+), 25 deletions(-) diff --git a/src/mavedb/lib/slack.py b/src/mavedb/lib/slack.py index 22786b69..035723d6 100644 --- a/src/mavedb/lib/slack.py +++ b/src/mavedb/lib/slack.py @@ -16,14 +16,7 @@ def find_traceback_locations(): ] -def send_slack_message(err, request=None): - text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} - - if request: - text["client"] = str(request.client.host) - text["request"] = f"{request.method} {request.url}" - - text = json.dumps(text) +def send_slack_message(text: str): slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL") if slack_webhook_url is not None and len(slack_webhook_url) > 0: client = WebhookClient(url=slack_webhook_url) @@ -38,3 +31,14 @@ def send_slack_message(err, request=None): ) else: print(f"EXCEPTION_HANDLER: {text}") + + +def send_slack_error(err, request=None): + text = {"type": err.__class__.__name__, "exception": str(err), "location": find_traceback_locations()} + + if request: + text["client"] = str(request.client.host) + text["request"] = f"{request.method} {request.url}" + + text = json.dumps(text) + send_slack_message(text) diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py index e1cf4c2c..5f49a35c 100644 --- a/src/mavedb/server_main.py +++ b/src/mavedb/server_main.py @@ -32,7 +32,7 @@ save_to_logging_context, ) from mavedb.lib.permissions import PermissionException -from mavedb.lib.slack import send_slack_message +from mavedb.lib.slack import send_slack_error from mavedb.models import * # noqa: F403 from mavedb.routers import ( access_keys, @@ -176,7 +176,7 @@ async def exception_handler(request, err): try: logger.error(msg="Uncaught exception.", extra=logging_context(), exc_info=err) - send_slack_message(err=err, request=request) + send_slack_error(err=err, request=request) finally: log_request(request, response, time.time_ns()) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index fc3d1786..de2a4e64 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -25,7 +25,7 @@ create_variants, create_variants_data, ) -from mavedb.lib.slack import send_slack_message +from mavedb.lib.slack import send_slack_error, send_slack_message from mavedb.lib.validation.dataframe import ( validate_and_standardize_dataframe_pair, ) @@ -185,7 +185,7 @@ async def create_variants_for_score_set( logging_context["created_variants"] = 0 logger.warning(msg="Encountered an internal exception while processing variants.", extra=logging_context) - send_slack_message(err=e) + send_slack_error(err=e) return {"success": False} # Catch all other exceptions. The exceptions caught here were intented to be system exiting. @@ -274,7 +274,7 @@ async def map_variants_for_score_set( loop = asyncio.get_running_loop() except Exception as e: - send_slack_message(e) + send_slack_error(e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="Variant mapper encountered an unexpected error during setup. This job will not be retried.", @@ -303,7 +303,7 @@ async def map_variants_for_score_set( db.add(score_set) db.commit() - send_slack_message(e) + send_slack_error(e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.warning( msg="Variant mapper encountered an unexpected error while mapping variants. This job will be retried.", @@ -330,7 +330,7 @@ async def map_variants_for_score_set( score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} db.add(score_set) db.commit() - send_slack_message(backoff_e) + send_slack_error(backoff_e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} logger.critical( msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", @@ -486,7 +486,7 @@ async def map_variants_for_score_set( db.add(score_set) db.commit() - send_slack_message(e) + send_slack_error(e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.warning( msg="An unexpected error occurred during variant mapping. This job will be attempted again.", @@ -511,7 +511,7 @@ async def map_variants_for_score_set( except Exception as backoff_e: score_set.mapping_state = MappingState.failed score_set.mapping_errors = {"error_message": "Encountered an internal server error during mapping"} - send_slack_message(backoff_e) + send_slack_error(backoff_e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(backoff_e)} logger.critical( msg="While attempting to re-enqueue a mapping job that exited in error, another exception was encountered. This score set will not be mapped.", @@ -587,7 +587,7 @@ async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int logging_context["existing_mapping_job_id"] = mapping_job_id except Exception as e: - send_slack_message(e) + send_slack_error(e) # Attempt to remove this item from the mapping queue. try: @@ -647,7 +647,7 @@ async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int raise MappingEnqueueError() except Exception as e: - send_slack_message(e) + send_slack_error(e) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="Variant mapper manager encountered an unexpected error while enqueing a mapping job. This job will not be retried.", @@ -707,6 +707,9 @@ async def refresh_published_variants_view(ctx: dict, correlation_id: str): async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int, publisher_id: int): logging_context = {} score_set = None + text = ( + "Could not submit mappings to LDH for score set %s. Mappings for this score set should be submitted manually." + ) try: db: Session = ctx["db"] score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() @@ -721,7 +724,12 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score logger.debug(msg="Fetched score set metadata for ldh mapped resource submission.", extra=logging_context) except Exception as e: - send_slack_message(e) + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="LDH mapped resource submission encountered an unexpected error during setup. This job will not be retried.", @@ -734,10 +742,11 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) ldh_service.authenticate() except Exception as e: - send_slack_message(e) + send_slack_error(e) + send_slack_message(text=text % score_set.urn) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( - msg="LDH mapped resource submission encountered an unexpected error while attempting to connect to the LDH. This job will not be retried.", + msg="LDH mapped resource submission encountered an unexpected error while attempting to authenticate to the LDH. This job will not be retried.", extra=logging_context, ) @@ -768,7 +777,8 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score submission_content = construct_ldh_submission(variant_content) except Exception as e: - send_slack_message(e) + send_slack_error(e) + send_slack_message(text=text % score_set.urn) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="LDH mapped resource submission encountered an unexpected error while attempting to construct submission objects. This job will not be retried.", @@ -785,7 +795,8 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score submission_successes, submission_failures = await loop.run_in_executor(ctx["pool"], blocking) except Exception as e: - send_slack_message(e) + send_slack_error(e) + send_slack_message(text=text % score_set.urn) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="LDH mapped resource submission encountered an unexpected error while dispatching submissions. This job will not be retried.", @@ -796,7 +807,10 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) except AssertionError as e: - send_slack_message(e) + send_slack_error(e) + send_slack_message( + text=f"{len(submission_failures)} submissions failed to be dispatched to the LDH for score set {score_set.urn}." + ) logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} logger.error( msg="LDH mapped resource submission failed to submit all mapping resources. This job will not be retried.", @@ -806,3 +820,36 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": False, "retried": False} return {"success": True, "retried": False} + + +async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, publisher_id: int) -> dict: + logging_context = {} + score_set = None + text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." + try: + db: Session = ctx["db"] + score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() + + logging_context = setup_job_state(ctx, publisher_id, score_set.urn, correlation_id) + logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) + + submission_urn = score_set.urn + assert submission_urn, "A valid URN is needed to link LDH objects for this score set." + + logging_context["current_ldh_linking_resource"] = submission_urn + logger.debug(msg="Fetched score set metadata for ldh mapped resource linkage.", extra=logging_context) + + except Exception as e: + send_slack_error(e) + if score_set: + send_slack_message(text=text % score_set.urn) + else: + send_slack_message(text=text % score_set_id) + + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error during setup. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} From bd17d1aa9a0259dc212c97fdfd57971dfcc6dfa9 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 11:43:41 -0700 Subject: [PATCH 064/166] Refactor clingen variation function into clingen lib --- src/mavedb/lib/clingen/linked_data_hub.py | 18 +++++++++++++++++- src/mavedb/scripts/link_clingen_variants.py | 19 ++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index afef79e2..da4bb5d3 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -3,11 +3,14 @@ import os from datetime import datetime from typing import Optional +from urllib import parse + from jose import jwt from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_LINKED_DATA_URL + from mavedb.lib.types.clingen import LdhSubmission from mavedb.lib.utils import batched, request_with_backoff @@ -122,3 +125,16 @@ def _existing_jwt(self) -> Optional[str]: logger.debug(msg="Found existing but expired Genboree JWT.", extra=logging_context()) return None + + +def get_clingen_variation(urn: str) -> Optional[dict]: + response = requests.get( + f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + headers={"Accept": "application/json"}, + ) + + if response.status_code == 200: + return response.json()["data"]["ldFor"]["Variant"][0] + else: + logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") + return None diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index 51110ed0..33d465fa 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -1,35 +1,20 @@ import click -import requests import logging -from typing import Optional, Sequence -from urllib import parse +from typing import Sequence from sqlalchemy import select from sqlalchemy.orm import Session +from mavedb.lib.clingen.linked_data_hub import get_clingen_variation from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant from mavedb.scripts.environment import with_database_session -from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -def get_clingen_variation(urn: str) -> Optional[dict]: - response = requests.get( - f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) - - if response.status_code == 200: - return response.json()["data"]["ldFor"]["Variant"][0] - else: - logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") - return None - - @click.command() @with_database_session @click.argument("urns", nargs=-1) From 73ab329418c1e9c6735d5a176949f681d8bc962e Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 11:44:06 -0700 Subject: [PATCH 065/166] Only submit mapped variants with a defined post mapped object --- src/mavedb/scripts/clingen_ldh_submission.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index c538d81b..370ba91b 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -52,6 +52,7 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis .join(MappedVariant) .join(ScoreSet) .where(ScoreSet.urn == urn) + .where(MappedVariant.post_mapped.is_not(None)) .where(MappedVariant.current.is_(True)) ).all() From 1543d0aed4aa42c20651e1a29c280e3dff222abb Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 16:59:44 -0700 Subject: [PATCH 066/166] Ensure post mapped metadata is not empty --- src/mavedb/scripts/link_clingen_variants.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index 33d465fa..cde1f8ba 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -12,7 +12,6 @@ from mavedb.scripts.environment import with_database_session logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) @click.command() @@ -34,7 +33,7 @@ def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool) -> select(Variant.urn) .join(MappedVariant) .join(ScoreSet) - .where(ScoreSet.urn == urn, MappedVariant.current.is_(True)) + .where(ScoreSet.urn == urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) ).all() for urn in urns ] From b4ba74588888767ce34c2d3ef2cf52cf855e8f5d Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 17 Apr 2025 17:05:22 -0700 Subject: [PATCH 067/166] Flesh out retry logic for linking clingen allele ids --- src/mavedb/lib/clingen/constants.py | 2 + src/mavedb/worker/jobs.py | 235 ++++++++++++++++++++++++++-- 2 files changed, 226 insertions(+), 11 deletions(-) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index ad21a12a..03be1156 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -15,3 +15,5 @@ DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_LINKED_DATA_URL = f"https://genboree.org/{LDH_TENANT}/{LDH_ENTITY_NAME}/id" + +LINKED_DATA_RETRY_THRESHOLD = 0.95 diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index de2a4e64..d76d2cea 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -15,9 +15,13 @@ from mavedb.data_providers.services import vrs_mapper from mavedb.db.view import refresh_all_mat_views -from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL +from mavedb.lib.clingen.constants import ( + DEFAULT_LDH_SUBMISSION_BATCH_SIZE, + LDH_SUBMISSION_URL, + LINKED_DATA_RETRY_THRESHOLD, +) from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation from mavedb.lib.exceptions import MappingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import ( @@ -43,7 +47,8 @@ MAPPING_QUEUE_NAME = "vrs_mapping_queue" MAPPING_CURRENT_ID_NAME = "vrs_mapping_current_job_id" BACKOFF_LIMIT = 5 -BACKOFF_IN_SECONDS = 15 +MAPPING_BACKOFF_IN_SECONDS = 15 +LINKING_BACKOFF_IN_SECONDS = 15 * 60 #################################################################################################### @@ -64,14 +69,14 @@ def setup_job_state( async def enqueue_job_with_backoff( - redis: ArqRedis, job_name: str, attempt: int, *args + redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args ) -> tuple[Optional[str], bool, Any]: new_job_id = None backoff = None limit_reached = attempt > BACKOFF_LIMIT if not limit_reached: limit_reached = True - backoff = BACKOFF_IN_SECONDS * (2**attempt) + backoff = backoff * (2**attempt) attempt = attempt + 1 # NOTE: for jobs supporting backoff, `attempt` should be the final argument. @@ -315,7 +320,7 @@ async def map_variants_for_score_set( try: await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, correlation_id, updater_id + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id ) # If we fail to enqueue a mapping manager for this score set, evict it from the queue. if new_job_id is None: @@ -498,7 +503,7 @@ async def map_variants_for_score_set( try: await redis.lpush(MAPPING_QUEUE_NAME, score_set.id) # type: ignore new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( - redis, "variant_mapper_manager", attempt, correlation_id, updater_id + redis, "variant_mapper_manager", attempt, MAPPING_BACKOFF_IN_SECONDS, correlation_id, updater_id ) # If we fail to enqueue a mapping manager for this score set, evict it from the queue. if new_job_id is None: @@ -704,7 +709,7 @@ async def refresh_published_variants_view(ctx: dict, correlation_id: str): #################################################################################################### -async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int, publisher_id: int): +async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score_set_id: int): logging_context = {} score_set = None text = ( @@ -714,7 +719,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score db: Session = ctx["db"] score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, publisher_id, score_set.urn, correlation_id) + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) logger.info(msg="Started LDH mapped resource submission", extra=logging_context) submission_urn = score_set.urn @@ -822,7 +827,24 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": True, "retried": False} -async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, publisher_id: int) -> dict: +def get_ldh_variation(logging_ctx: dict, variant_urns: list[str]): + linked_data = [] + for idx, variant_urn in enumerate(variant_urns): + logging_ctx["on_variation_fetch"] = idx + ldh_variation = get_clingen_variation(variant_urn) + + if not ldh_variation: + linked_data.append((variant_urn, None)) + continue + else: + linked_data.append((variant_urn, ldh_variation["entId"])) + + logger.debug(msg=f"Found ClinGen variation {ldh_variation['entId']} for URN {variant_urn}.", extra=logging_ctx) + + return linked_data + + +async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: logging_context = {} score_set = None text = "Could not link mappings to LDH for score set %s. Mappings for this score set should be linked manually." @@ -830,7 +852,10 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in db: Session = ctx["db"] score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() - logging_context = setup_job_state(ctx, publisher_id, score_set.urn, correlation_id) + logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) + logging_context["linkage_retry_threshold"] = LINKED_DATA_RETRY_THRESHOLD + logging_context["attempt"] = attempt + logging_context["max_attempts"] = BACKOFF_LIMIT logger.info(msg="Started LDH mapped resource linkage", extra=logging_context) submission_urn = score_set.urn @@ -853,3 +878,191 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in ) return {"success": False, "retried": False} + + try: + variant_urns = db.scalars( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where( + ScoreSet.urn == score_set.urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None) + ) + ).all() + num_variant_urns = len(variant_urns) + + logging_context["variants_to_link_ldh"] = submission_urn + + if not variant_urns: + logger.warning( + msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH linkage (nothing to do).", + extra=logging_context, + ) + + return {"success": True, "retried": False} + + logger.info( + msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", + extra=logging_context, + ) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to build linkage urn list. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + # TODO#372: Non-nullable variant urns. + blocking = functools.partial( + get_ldh_variation, + variant_urns, # type: ignore + ) + loop = asyncio.get_running_loop() + linked_data = await loop.run_in_executor(ctx["pool"], blocking) + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + linkage_failures = [] + for variant_urn, ldh_variation in linked_data: + # XXX: Should we unlink variation if it is not found? + if not ldh_variation: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant = db.scalars( + select(MappedVariant).join(Variant).where(Variant.urn == variant_urn, MappedVariant.current.is_(True)) + ).one_or_none() + + if not mapped_variant: + logger.warning( + msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No mapped variant found.", + extra=logging_context, + ) + linkage_failures.append(variant_urn) + continue + + mapped_variant.clingen_allele_id = ldh_variation + db.add(mapped_variant) + + db.commit() + + except Exception as e: + db.rollback() + + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to link LDH submissions. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + try: + num_linkage_failures = len(linkage_failures) + ratio_failed_linking = round(num_linkage_failures / num_variant_urns, 3) + logging_context["linkage_failure_rate"] = ratio_failed_linking + logging_context["linkage_failures"] = num_linkage_failures + logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures + + if not linkage_failures: + logger.info( + msg="Successfully linked all mapped variants to LDH submissions.", + extra=logging_context, + ) + return {"success": True, "retried": False} + + if ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: + logger.warning( + msg="Linkage failures exist, but did not exceed the retry threshold.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." + f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." + ) + return {"success": True, "retried": False} + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to finalize linkage. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False} + + # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). + new_job_id = None + max_retries_exceeded = None + try: + new_job_id, max_retries_exceeded, backoff_time = await enqueue_job_with_backoff( + ctx["redis"], "variant_mapper_manager", attempt, LINKING_BACKOFF_IN_SECONDS, correlation_id + ) + + logging_context["backoff_limit_exceeded"] = max_retries_exceeded + logging_context["backoff_deferred_in_seconds"] = backoff_time + logging_context["backoff_job_id"] = new_job_id + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.critical( + msg="LDH mapped resource linkage encountered an unexpected error while attempting to retry a failed linkage job. This job will not be retried.", + extra=logging_context, + ) + else: + if new_job_id and not max_retries_exceeded: + logger.info( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"The retry threshold was exceeded and this job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." + ) + elif new_job_id is None and not max_retries_exceeded: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, another linkage job was unable to be queued.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"The retry threshold was exceeded but this job could not be retried. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." + ) + else: + logger.error( + msg="After a failure condition while linking mapped variants to LDH submissions, the maximum retries for this job were exceeded. The reamining linkage failures will not be retried.", + extra=logging_context, + ) + send_slack_message( + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + f"The retry threshold was exceeded but this job has exceeded the maximum retry level. URNs failed to link: {', '.join(linkage_failures)}." + ) + + finally: + return {"success": False, "retried": (not max_retries_exceeded and new_job_id is not None)} From d0c2f85b5810893ecb42f5443100adcca8790c61 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 22 Apr 2025 16:16:13 -0700 Subject: [PATCH 068/166] Handle Haplotype VRS Objects --- src/mavedb/lib/variants.py | 29 ++++++++++ src/mavedb/scripts/clingen_ldh_submission.py | 36 +++++------- src/mavedb/worker/jobs.py | 61 ++++++++++++++++---- 3 files changed, 93 insertions(+), 33 deletions(-) create mode 100644 src/mavedb/lib/variants.py diff --git a/src/mavedb/lib/variants.py b/src/mavedb/lib/variants.py new file mode 100644 index 00000000..c144bdd1 --- /dev/null +++ b/src/mavedb/lib/variants.py @@ -0,0 +1,29 @@ +from mavedb.models.mapped_variant import MappedVariant + + +def hgvs_from_vrs_allele(allele: dict) -> str: + """ + Extract the HGVS notation from the VRS allele. + """ + try: + # VRS 1.X + return allele["expressions"][0]["value"] + except KeyError: + # VRS 2.X + return allele["variation"]["expressions"][0]["value"] + + +def hgvs_from_mapped_variant(mapped_variant: MappedVariant) -> list[str]: + """ + Extract the HGVS notation from the post_mapped field of the MappedVariant object. + """ + # Necessarily a dictionary object + post_mapped_object: dict = mapped_variant.post_mapped # type: ignore + + if not post_mapped_object: + return [] + + if post_mapped_object["type"] == "Haplotype" or post_mapped_object["type"] == "CisPhasedBlock": # type: ignore + return [hgvs_from_vrs_allele(allele) for allele in post_mapped_object["members"]] + else: + return [hgvs_from_vrs_allele(post_mapped_object)] diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index 370ba91b..e25563fc 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -12,21 +12,11 @@ from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL from mavedb.lib.clingen.content_constructors import construct_ldh_submission +from mavedb.lib.variants import hgvs_from_mapped_variant logger = logging.getLogger(__name__) -def _variation_from_post_mapped(mapped_variant: MappedVariant) -> str: - """ - Extract the variation from the post_mapped field of the MappedVariant object. - """ - try: - # Assuming post_mapped is a dictionary with a specific structure - return mapped_variant.post_mapped["expressions"][0]["value"] # type: ignore - except KeyError: - return mapped_variant.post_mapped["variation"]["expressions"][0]["value"] # type: ignore - - def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> list[str]: ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) ldh_service.authenticate() @@ -61,14 +51,17 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis continue logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") - variant_content = [ - ( - _variation_from_post_mapped(mapped_variant), - variant, - mapped_variant, - ) - for variant, mapped_variant in variant_objects - ] + + variant_content: list[tuple[str, Variant, MappedVariant]] = [] + for variant, mapped_variant in variant_objects: + variation = hgvs_from_mapped_variant(mapped_variant) + + if not variation: + logger.warning(f"No variation found for variant {variant.urn}.") + continue + + for allele in variation: + variant_content.append((allele, variant, mapped_variant)) if debug: logger.debug("Debug mode enabled. Submitting only one request to ClinGen.") @@ -85,12 +78,13 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis else: logger.info(f"Successfully submitted all variants for URN: {urn}") - submitted_entities.extend([variant[1].urn for variant in variant_content]) + submitted_entities.extend([variant.urn for _, variant, _ in variant_content]) except Exception as e: logger.error(f"Error processing URN {urn}", exc_info=e) - return submitted_entities + # TODO#372: non-nullable urns. + return submitted_entities # type: ignore @click.command() diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index d76d2cea..715f6ede 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -22,7 +22,12 @@ ) from mavedb.lib.clingen.content_constructors import construct_ldh_submission from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation -from mavedb.lib.exceptions import MappingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError +from mavedb.lib.exceptions import ( + MappingEnqueueError, + LinkingEnqueueError, + NonexistentMappingReferenceError, + NonexistentMappingResultsError, +) from mavedb.lib.logging.context import format_raised_exception_info_as_dict from mavedb.lib.score_sets import ( columns_for_dataset, @@ -34,6 +39,7 @@ validate_and_standardize_dataframe_pair, ) from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.variants import hgvs_from_mapped_variant from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant @@ -72,7 +78,6 @@ async def enqueue_job_with_backoff( redis: ArqRedis, job_name: str, attempt: int, backoff: int, *args ) -> tuple[Optional[str], bool, Any]: new_job_id = None - backoff = None limit_reached = attempt > BACKOFF_LIMIT if not limit_reached: limit_reached = True @@ -717,6 +722,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score ) try: db: Session = ctx["db"] + redis: ArqRedis = ctx["redis"] score_set = db.scalars(select(ScoreSet).where(ScoreSet.id == score_set_id)).one() logging_context = setup_job_state(ctx, None, score_set.urn, correlation_id) @@ -741,7 +747,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) @@ -755,7 +761,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: variant_objects = db.scalars( @@ -773,12 +779,13 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score msg="No current mapped variants with post mapped metadata were found for this score set. Skipping LDH submission.", extra=logging_context, ) - return {"success": True, "retried": False} + return {"success": True, "retried": False, "enqueued_job": None} + + variant_content = [] + for variant, mapped_variant in variant_objects: + for variation in hgvs_from_mapped_variant(mapped_variant): + variant_content.append((variation, variant, mapped_variant)) - variant_content = [ - (mapped_variant.post_mapped["variation"]["expressions"][0]["value"], variant, mapped_variant) - for variant, mapped_variant in variant_objects - ] submission_content = construct_ldh_submission(variant_content) except Exception as e: @@ -790,7 +797,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: blocking = functools.partial( @@ -822,9 +829,39 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} - return {"success": True, "retried": False} + new_job = None + try: + new_job = await redis.enqueue_job( + "link_clingen_variants", + correlation_id, + score_set.id, + 1, + defer_by=timedelta(minutes=LINKING_BACKOFF_IN_SECONDS), + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["link_clingen_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen linking job.", extra=logging_context) + + else: + raise LinkingEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message(text=text % score_set.urn) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="LDH mapped resource submission encountered an unexpected error while attempting to enqueue a linking job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job} + + return {"success": True, "retried": False, "enqueued_job": new_job} def get_ldh_variation(logging_ctx: dict, variant_urns: list[str]): From a9ace94e618c16ac237f0270828cf6bb5f07d1c0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 22 Apr 2025 16:16:34 -0700 Subject: [PATCH 069/166] Add unlinked flag to linking job --- src/mavedb/scripts/link_clingen_variants.py | 23 ++++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index cde1f8ba..d0cd921c 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -18,7 +18,8 @@ @with_database_session @click.argument("urns", nargs=-1) @click.option("--score-sets/--variants", default=False) -def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool) -> None: +@click.option("--unlinked", default=False) +def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, unlinked: bool) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. """ @@ -28,15 +29,17 @@ def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool) -> # Convert score set URNs to variant URNs. if score_sets: - variants = [ - db.scalars( - select(Variant.urn) - .join(MappedVariant) - .join(ScoreSet) - .where(ScoreSet.urn == urn, MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) - ).all() - for urn in urns - ] + query = ( + select(Variant.urn) + .join(MappedVariant) + .join(ScoreSet) + .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) + ) + + if unlinked: + query = query.where(MappedVariant.clingen_allele_id.is_(None)) + + variants = [db.scalars(query.where(ScoreSet.urn == urn)).all() for urn in urns] urns = [variant for sublist in variants for variant in sublist if variant is not None] failed_urns = [] From 1d4d6b558a4437157757b0c068edcec541164a50 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 22 Apr 2025 16:20:49 -0700 Subject: [PATCH 070/166] wip: docs and tests for clingen jobs --- src/mavedb/lib/clingen/linked_data_hub.py | 92 ++++++- src/mavedb/lib/clingen/py.typed | 0 tests/helpers/constants.py | 7 + tests/lib/clingen/__init__.py | 0 .../lib/clingen/test_content_constructors.py | 153 +++++++++++ tests/lib/clingen/test_linked_data_hub.py | 258 ++++++++++++++++++ tests/worker/test_jobs.py | 59 ++++ 7 files changed, 568 insertions(+), 1 deletion(-) create mode 100644 src/mavedb/lib/clingen/py.typed create mode 100644 tests/lib/clingen/__init__.py create mode 100644 tests/lib/clingen/test_content_constructors.py create mode 100644 tests/lib/clingen/test_linked_data_hub.py diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index da4bb5d3..0c6df825 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -18,12 +18,66 @@ class ClinGenLdhService: + """ + A service class for interacting with the ClinGen Linked Data Hub (LDH) API. + + This class provides methods for authenticating with the Genboree services and dispatching + submissions to the ClinGen LDH API. + + Attributes: + url (str): The base URL of the ClinGen LDH API. + + Methods: + __init__(url: str) -> None: + Initializes the ClinGenLdhService instance with the given API URL. + + authenticate() -> str: + Authenticates with the Genboree services and retrieves a JSON Web Token (JWT). + If a valid JWT already exists, it is reused. Otherwise, a new JWT is obtained + by authenticating with the Genboree API. + + dispatch_submissions(content_submissions: list[LdhSubmission], batch_size: Optional[int] = None) -> tuple[list, list]: + Dispatches a list of LDH submissions to the ClinGen LDH API. Supports optional + batching of submissions. + + Args: + content_submissions (list[LdhSubmission]): A list of LDH submissions to be dispatched. + batch_size (Optional[int]): The size of each batch for submission. If None, no batching is applied. + + Returns: + tuple[list, list]: A tuple containing two lists: + - A list of successful submission responses. + - A list of failed submissions. + + _existing_jwt() -> Optional[str]: + Checks for an existing and valid Genboree JWT in the environment variables. + + Returns: + Optional[str]: The existing JWT if valid, or None if no valid JWT is found. + """ + def __init__(self, url: str) -> None: self.url = url def authenticate(self) -> str: + """ + Authenticates with Genboree services and retrieves a JSON Web Token (JWT). + + This method first checks for an existing JWT using the `_existing_jwt` method. If a valid JWT is found, + it is returned immediately. Otherwise, the method attempts to authenticate with Genboree services + using the account name and password provided via environment variables. + + Raises: + ValueError: If the Genboree account name or password is not set, or if the JWT cannot be parsed + from the authentication response. + requests.exceptions.HTTPError: If the HTTP request to Genboree services fails. + + Returns: + str: The JWT retrieved from Genboree services, which is also stored in the `GENBOREE_JWT` + environment variable for future use. + """ if existing_jwt := self._existing_jwt(): - logger.info(msg="Using existing Genboree JWT for authentication.", extra=logging_context()) + logger.debug(msg="Using existing Genboree JWT for authentication.", extra=logging_context()) return existing_jwt logger.debug( @@ -68,6 +122,22 @@ def authenticate(self) -> str: def dispatch_submissions( self, content_submissions: list[LdhSubmission], batch_size: Optional[int] = None ) -> tuple[list, list]: + """ + Dispatches a list of content submissions to a specified URL in batches, if specified. + + Args: + content_submissions (list[LdhSubmission]): A list of submissions to be dispatched. + batch_size (Optional[int]): The size of each batch for dispatching submissions. + If None, submissions are dispatched without batching. + + Returns: + tuple[list, list]: A tuple containing two lists: + - The first list contains the successful submission responses. + - The second list contains the submissions that failed to dispatch. + + Raises: + requests.exceptions.RequestException: If an error occurs during the HTTP request. + """ submission_successes = [] submission_failures = [] submissions = list(batched(content_submissions, batch_size)) if batch_size is not None else content_submissions @@ -109,6 +179,16 @@ def dispatch_submissions( return submission_successes, submission_failures def _existing_jwt(self) -> Optional[str]: + """ + Checks for an existing Genboree JWT (JSON Web Token) in the environment variables. + + This method retrieves the JWT from the "GENBOREE_JWT" environment variable, verifies its + presence, and checks its expiration status. If the token is valid and not expired, it is returned. + Otherwise, it returns None. + + Returns: + Optional[str]: The existing and valid Genboree JWT if found, otherwise None. + """ logger.debug(msg="Checking for existing Genboree JWT.", extra=logging_context()) existing_jwt = os.getenv("GENBOREE_JWT") @@ -128,6 +208,16 @@ def _existing_jwt(self) -> Optional[str]: def get_clingen_variation(urn: str) -> Optional[dict]: + """ + Fetches ClinGen variation data for a given URN (Uniform Resource Name) from the Linked Data Hub. + + Args: + urn (str): The URN of the variation to fetch. + + Returns: + Optional[dict]: A dictionary containing the variation data if the request is successful, + or None if the request fails. + """ response = requests.get( f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, diff --git a/src/mavedb/lib/clingen/py.typed b/src/mavedb/lib/clingen/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index ecaeafca..c3874aca 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -866,3 +866,10 @@ "dbVersion": "2024", "mappedVariants": [], } + + +TEST_CLINGEN_SUBMISSION_RESPONSE = { + "data": {"msg": "Data sent successfully", "msgIds": ["(148894,0,-1,0)"]}, + "metadata": {"rendered": {"by": "https://genboree.org/mq/brdg/srvc", "when": datetime.now().isoformat()}}, + "status": {"code": 200, "name": "OK"}, +} diff --git a/tests/lib/clingen/__init__.py b/tests/lib/clingen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py new file mode 100644 index 00000000..1e691b1d --- /dev/null +++ b/tests/lib/clingen/test_content_constructors.py @@ -0,0 +1,153 @@ +from unittest.mock import patch +from uuid import UUID + +from mavedb.lib.clingen.content_constructors import ( + construct_ldh_submission_event, + construct_ldh_submission_subject, + construct_ldh_submission, + construct_ldh_submission_entity, +) +from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE, MAVEDB_BASE_GIT +from mavedb import __version__ + + +def test_construct_ldh_submission_event(): + sbj = {"Variant": {"hgvs": "NM_000546.5:c.215C>G"}} + + with ( + patch("mavedb.lib.clingen.content_constructors.uuid4") as mock_uuid4, + patch("mavedb.lib.clingen.content_constructors.datetime") as mock_datetime, + ): + mock_uuid4.return_value = UUID("12345678-1234-5678-1234-567812345678") + mock_datetime.now.return_value = "2023-01-01T00:00:00" + + result = construct_ldh_submission_event(sbj) + + assert result["type"] == LDH_SUBMISSION_TYPE + assert result["name"] == LDH_ENTITY_NAME + assert result["uuid"] == "12345678-1234-5678-1234-567812345678" + assert result["sbj"] == { + "id": "NM_000546.5:c.215C>G", + "type": "Variant", + "format": "hgvs", + "add": True, + } + assert result["triggered"]["by"] == { + "host": MAVEDB_BASE_GIT, + "id": "resource_published", + "iri": f"{MAVEDB_BASE_GIT}/releases/tag/v{__version__}", + } + assert result["triggered"]["at"] == "2023-01-01T00:00:00" + + +def test_construct_ldh_submission_subject(): + hgvs = "NM_000546.5:c.215C>G" + result = construct_ldh_submission_subject(hgvs) + + assert result == {"Variant": {"hgvs": hgvs}} + + +def test_construct_ldh_submission_entity(): + class MockVariant: + def __init__(self, urn, data): + self.urn = urn + self.data = data + + class MockMappedVariant: + def __init__(self, pre_mapped, post_mapped, mapping_api_version): + self.pre_mapped = pre_mapped + self.post_mapped = post_mapped + self.mapping_api_version = mapping_api_version + + variant = MockVariant( + urn="urn:example:variant123", + data={"score_data": {"score": 0.95}}, + ) + mapped_variant = MockMappedVariant( + pre_mapped="pre-mapped-value", + post_mapped="post-mapped-value", + mapping_api_version="v1.0", + ) + + result = construct_ldh_submission_entity(variant, mapped_variant) + + assert "MaveDBMapping" in result + assert len(result["MaveDBMapping"]) == 1 + mapping = result["MaveDBMapping"][0] + + assert mapping["entContent"]["mavedb_id"] == "urn:example:variant123" + assert mapping["entContent"]["pre_mapped"] == "pre-mapped-value" + assert mapping["entContent"]["post_mapped"] == "post-mapped-value" + assert mapping["entContent"]["mapping_api_version"] == "v1.0" + assert mapping["entContent"]["score"] == 0.95 + + assert mapping["entId"] == "urn:example:variant123" + assert mapping["entIri"] == "https://staging.mavedb.org/score-sets/urn:example:variant123" + + +def test_construct_ldh_submission(): + class MockVariant: + def __init__(self, urn, data): + self.urn = urn + self.data = data + + class MockMappedVariant: + def __init__(self, pre_mapped, post_mapped, mapping_api_version): + self.pre_mapped = pre_mapped + self.post_mapped = post_mapped + self.mapping_api_version = mapping_api_version + + variant1 = MockVariant( + urn="urn:example:variant123", + data={"score_data": {"score": 0.95}}, + ) + mapped_variant1 = MockMappedVariant( + pre_mapped="pre-mapped-value1", + post_mapped="post-mapped-value1", + mapping_api_version="v1.0", + ) + + variant2 = MockVariant( + urn="urn:example:variant456", + data={"score_data": {"score": 0.85}}, + ) + mapped_variant2 = MockMappedVariant( + pre_mapped="pre-mapped-value2", + post_mapped="post-mapped-value2", + mapping_api_version="v2.0", + ) + + variant_content = [ + ("NM_000546.5:c.215C>G", variant1, mapped_variant1), + ("NM_000546.5:c.216C>T", variant2, mapped_variant2), + ] + + with ( + patch("mavedb.lib.clingen.content_constructors.uuid4") as mock_uuid4, + patch("mavedb.lib.clingen.content_constructors.datetime") as mock_datetime, + ): + mock_uuid4.side_effect = [ + UUID("12345678-1234-5678-1234-567812345678"), + UUID("87654321-4321-8765-4321-876543218765"), + ] + mock_datetime.now.return_value = "2023-01-01T00:00:00" + + result = construct_ldh_submission(variant_content) + + assert len(result) == 2 + + # Validate the first submission + submission1 = result[0] + assert submission1["event"]["uuid"] == "12345678-1234-5678-1234-567812345678" + assert submission1["event"]["sbj"]["id"] == "NM_000546.5:c.215C>G" + assert submission1["content"]["sbj"] == {"Variant": {"hgvs": "NM_000546.5:c.215C>G"}} + assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == "urn:example:variant123" + assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 0.95 + + # Validate the second submission + submission2 = result[1] + assert submission2["event"]["uuid"] == "87654321-4321-8765-4321-876543218765" + assert submission2["event"]["sbj"]["id"] == "NM_000546.5:c.216C>T" + assert submission2["content"]["sbj"] == {"Variant": {"hgvs": "NM_000546.5:c.216C>T"}} + assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == "urn:example:variant456" + assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 0.85 diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py new file mode 100644 index 00000000..dc1750ef --- /dev/null +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -0,0 +1,258 @@ +import os +from urllib import parse +import pytest +import requests +from datetime import datetime +from unittest.mock import patch, MagicMock +from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation + + +@pytest.fixture +def clingen_service(): + yield ClinGenLdhService(url="https://pytest.clingen.com") + + +class TestClinGenLdhService: + def test_init(self, clingen_service): + assert clingen_service.url == "https://pytest.clingen.com" + + ### Test the authenticate method + + def test_authenticate_with_existing_jwt(self, clingen_service: ClinGenLdhService): + with patch.object(ClinGenLdhService, "_existing_jwt", return_value="existing_jwt_token") as mock_existing_jwt: + jwt = clingen_service.authenticate() + + assert jwt == "existing_jwt_token" + mock_existing_jwt.assert_called_once() + + @patch("mavedb.lib.clingen.linked_data_hub.requests.post") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") + @patch.dict( + os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}, clear=True + ) + def test_authenticate_with_new_jwt(self, mock_existing_jwt, mock_post, clingen_service): + mock_existing_jwt.return_value = None + + mock_response = MagicMock() + mock_response.json.return_value = {"data": {"jwt": "new_jwt_token"}} + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + jwt = clingen_service.authenticate() + assert jwt == "new_jwt_token" + assert os.environ["GENBOREE_JWT"] == "new_jwt_token" + mock_post.assert_called_once_with( + "https://genboree.org/auth/usr/gb:test_account/auth", + json={"type": "plain", "val": "test_password"}, + ) + + @patch("mavedb.lib.clingen.linked_data_hub.requests.post") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") + @patch.dict(os.environ, {}, clear=True) + def test_authenticate_missing_credentials(self, mock_existing_jwt, mock_post, clingen_service): + mock_existing_jwt.return_value = None + with pytest.raises(ValueError, match="Genboree account name and/or password are not set"): + clingen_service.authenticate() + mock_post.assert_not_called() + + @patch("mavedb.lib.clingen.linked_data_hub.requests.post") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") + @patch.dict( + os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}, clear=True + ) + def test_authenticate_http_error(mock_existing_jwt, mock_post, clingen_service): + mock_existing_jwt.return_value = None + + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("HTTP Error") + mock_post.return_value = mock_response + + with pytest.raises(requests.exceptions.HTTPError, match="HTTP Error"): + clingen_service.authenticate() + mock_post.assert_called_once() + + @patch("mavedb.lib.clingen.linked_data_hub.requests.post") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") + @patch.dict(os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}) + def test_authenticate_missing_jwt_in_response(mock_existing_jwt, mock_post, clingen_service): + mock_existing_jwt.return_value = None + + mock_response = MagicMock() + mock_response.json.return_value = {"data": {}} + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + with pytest.raises(ValueError, match="Could not parse JWT from valid response"): + clingen_service.authenticate() + mock_post.assert_called_once() + + ### Test the _existing_jwt method + + @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") + @patch("mavedb.lib.clingen.linked_data_hub.jwt.get_unverified_claims") + def test_existing_jwt_valid(mock_get_unverified_claims, mock_getenv, clingen_service): + mock_getenv.return_value = "valid_jwt_token" + mock_get_unverified_claims.return_value = {"exp": (datetime.now().timestamp() + 3600)} + + jwt = clingen_service._existing_jwt() + assert jwt == "valid_jwt_token" + mock_getenv.assert_called_once_with("GENBOREE_JWT") + mock_get_unverified_claims.assert_called_once_with("valid_jwt_token") + + @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") + @patch("mavedb.lib.clingen.linked_data_hub.jwt.get_unverified_claims") + def test_existing_jwt_expired(mock_get_unverified_claims, mock_getenv, clingen_service): + mock_getenv.return_value = "expired_jwt_token" + mock_get_unverified_claims.return_value = {"exp": (datetime.now().timestamp() - 3600)} + + jwt = clingen_service._existing_jwt() + assert jwt is None + mock_getenv.assert_called_once_with("GENBOREE_JWT") + mock_get_unverified_claims.assert_called_once_with("expired_jwt_token") + + @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") + def test_existing_jwt_not_set(mock_getenv, clingen_service): + mock_getenv.return_value = None + + jwt = clingen_service._existing_jwt() + assert jwt is None + mock_getenv.assert_called_once_with("GENBOREE_JWT") + + ### Test the dispatch_submissions method + + @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") + @patch("mavedb.lib.clingen.linked_data_hub.batched") + def test_dispatch_submissions_success(mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service): + mock_authenticate.return_value = "test_jwt_token" + mock_request_with_backoff.return_value.json.return_value = {"success": True} + + content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] + mock_batched.return_value = [[{"id": 1}, {"id": 2}], [{"id": 3}]] # Simulate batching + + successes, failures = clingen_service.dispatch_submissions(content_submissions, batch_size=2) + + assert len(successes) == 3 + assert len(failures) == 0 + mock_batched.assert_called_once_with(content_submissions, 2) + mock_request_with_backoff.assert_called_with( + method="PUT", + url=clingen_service.url, + json={"id": 1}, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) + + @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") + def test_dispatch_submissions_failure(mock_authenticate, mock_request_with_backoff, clingen_service): + mock_authenticate.return_value = "test_jwt_token" + mock_request_with_backoff.side_effect = requests.exceptions.RequestException("Request failed") + + content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] + + successes, failures = clingen_service.dispatch_submissions(content_submissions) + + assert len(successes) == 0 + assert len(failures) == 3 + mock_request_with_backoff.assert_called_with( + method="PUT", + url=clingen_service.url, + json={"id": 1}, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) + + @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") + def test_dispatch_submissions_partial_success(mock_authenticate, mock_request_with_backoff, clingen_service): + mock_authenticate.return_value = "test_jwt_token" + + def mock_request_with_backoff_side_effect(*args, **kwargs): + if kwargs["json"]["id"] == 2: + raise requests.exceptions.RequestException("Request failed") + return MagicMock(json=MagicMock(return_value={"success": True})) + + mock_request_with_backoff.side_effect = mock_request_with_backoff_side_effect + + content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] + + successes, failures = clingen_service.dispatch_submissions(content_submissions) + + assert len(successes) == 2 + assert len(failures) == 1 + assert failures[0]["id"] == 2 + + @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") + @patch("mavedb.lib.clingen.linked_data_hub.batched") + def test_dispatch_submissions_no_batching( + mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service + ): + mock_authenticate.return_value = "test_jwt_token" + mock_request_with_backoff.return_value.json.return_value = {"success": True} + + content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] + mock_batched.return_value = content_submissions # No batching + + successes, failures = clingen_service.dispatch_submissions(content_submissions) + + assert len(successes) == 3 + assert len(failures) == 0 + mock_batched.assert_not_called() + mock_request_with_backoff.assert_called_with( + method="PUT", + url=clingen_service.url, + json={"id": 1}, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) + + +@patch("mavedb.lib.clingen.linked_data_hub.requests.get") +def test_get_clingen_variation_success(mock_get): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} + mock_get.return_value = mock_response + + urn = "urn:example:variant" + result = get_clingen_variation(urn) + + assert result == {"id": "variant_1", "name": "Test Variant"} + mock_get.assert_called_once_with( + f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + headers={"Accept": "application/json"}, + ) + + +@patch("mavedb.lib.clingen.linked_data_hub.requests.get") +def test_get_clingen_variation_failure(mock_get): + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.text = "Not Found" + mock_get.return_value = mock_response + + urn = "urn:example:nonexistent_variant" + result = get_clingen_variation(urn) + + assert result is None + mock_get.assert_called_once_with( + f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + headers={"Accept": "application/json"}, + ) + + +@patch("mavedb.lib.clingen.linked_data_hub.requests.get") +def test_get_clingen_variation_invalid_response(mock_get): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"data": {"ldFor": {}}} # Missing "Variant" key + mock_get.return_value = mock_response + + urn = "urn:example:variant" + with pytest.raises(KeyError): + get_clingen_variation(urn) + + mock_get.assert_called_once_with( + f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + headers={"Accept": "application/json"}, + ) diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 18e0846a..6886ef82 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -30,9 +30,11 @@ create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager, + submit_score_set_mappings_to_ldh, ) from tests.helpers.constants import ( TEST_CDOT_TRANSCRIPT, + TEST_CLINGEN_SUBMISSION_RESPONSE, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, @@ -1457,3 +1459,60 @@ async def failed_mapping_job(): assert len(mapped_variants_for_score_set) == 0 assert score_set.mapping_state == MappingState.failed assert score_set.mapping_errors is not None + + +############################################################################################################################################ +# ClinGen Submission +############################################################################################################################################ + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return TEST_CLINGEN_SUBMISSION_RESPONSE + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def failed_submission_job(): + raise Exception("Submission failed") + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=failed_submission_job(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] From 24106960cf0d8b2e039bd3f556a5f7e1f8ef092f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 22 Apr 2025 16:21:14 -0700 Subject: [PATCH 071/166] Add enqueue linking error to exceptions classes --- src/mavedb/lib/exceptions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 27759438..026c6800 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -176,3 +176,11 @@ class NonexistentMappingReferenceError(ValueError): class MappingEnqueueError(ValueError): """Raised when a mapping job fails to be enqueued despite appearing as if it should have been""" + + pass + + +class LinkingEnqueueError(ValueError): + """Raised when a linking job fails to be enqueued despite appearing as if it should have been""" + + pass From 6e67d1a0e94813de766e4712b6443d485844b1b0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 22 Apr 2025 16:21:28 -0700 Subject: [PATCH 072/166] Add new clingen functions to worker --- src/mavedb/worker/settings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mavedb/worker/settings.py b/src/mavedb/worker/settings.py index 76bc4a32..754c3f44 100644 --- a/src/mavedb/worker/settings.py +++ b/src/mavedb/worker/settings.py @@ -15,6 +15,8 @@ variant_mapper_manager, refresh_materialized_views, refresh_published_variants_view, + submit_score_set_mappings_to_ldh, + link_clingen_variants, ) # ARQ requires at least one task on startup. @@ -23,6 +25,8 @@ variant_mapper_manager, map_variants_for_score_set, refresh_published_variants_view, + submit_score_set_mappings_to_ldh, + link_clingen_variants, ] # In UTC time. Depending on daylight savings time, this will bounce around by an hour but should always be very early in the morning # for all of the USA. From 4f19304ef8613b9ca3a0ba96128b68bf1e4a799f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 23 Apr 2025 10:48:14 -0700 Subject: [PATCH 073/166] Tests for VRS Variation Extraction Utilities --- src/mavedb/lib/variants.py | 13 ++++--- tests/helpers/constants.py | 69 +++++++++++++++++++++++++++++++++++--- tests/lib/test_variants.py | 68 +++++++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 10 deletions(-) create mode 100644 tests/lib/test_variants.py diff --git a/src/mavedb/lib/variants.py b/src/mavedb/lib/variants.py index c144bdd1..ad4c2676 100644 --- a/src/mavedb/lib/variants.py +++ b/src/mavedb/lib/variants.py @@ -6,10 +6,10 @@ def hgvs_from_vrs_allele(allele: dict) -> str: Extract the HGVS notation from the VRS allele. """ try: - # VRS 1.X + # VRS 2.X return allele["expressions"][0]["value"] except KeyError: - # VRS 2.X + # VRS 1.X return allele["variation"]["expressions"][0]["value"] @@ -17,13 +17,16 @@ def hgvs_from_mapped_variant(mapped_variant: MappedVariant) -> list[str]: """ Extract the HGVS notation from the post_mapped field of the MappedVariant object. """ - # Necessarily a dictionary object post_mapped_object: dict = mapped_variant.post_mapped # type: ignore if not post_mapped_object: return [] - if post_mapped_object["type"] == "Haplotype" or post_mapped_object["type"] == "CisPhasedBlock": # type: ignore + if post_mapped_object["type"] == "Haplotype": # type: ignore return [hgvs_from_vrs_allele(allele) for allele in post_mapped_object["members"]] - else: + elif post_mapped_object["type"] == "CisPhasedBlock": # type: ignore + return [hgvs_from_vrs_allele(allele) for allele in post_mapped_object["members"]] + elif post_mapped_object["type"] == "Allele": # type: ignore return [hgvs_from_vrs_allele(post_mapped_object)] + else: + raise ValueError(f"Unsupported post_mapped type: {post_mapped_object['type']}") diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index c3874aca..fd54bb5f 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -19,6 +19,7 @@ TEST_SEQUENCE_LOCATION_ACCESSION = "ga4gh:SL.test" TEST_REFSEQ_IDENTIFIER = "NM_003345" +TEST_HGVS_IDENTIFIER = f"{TEST_REFSEQ_IDENTIFIER}:p.Asp5Phe" VALID_ACCESSION = "NM_001637.3" VALID_GENE = "BRCA1" @@ -45,7 +46,53 @@ "id": 1, } -TEST_VALID_PRE_MAPPED_VRS_ALLELE = { +# VRS 1.X +TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X = { + "id": TEST_GA4GH_IDENTIFIER, + "type": "Allele", + "variation": { + "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, + "digest": TEST_GA4GH_DIGEST, + "location": { + "id": TEST_SEQUENCE_LOCATION_ACCESSION, + "end": 2, + "type": "SequenceLocation", + "start": 1, + "digest": TEST_GA4GH_DIGEST, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": TEST_REFGET_ACCESSION, + }, + }, + "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "W"}], + }, +} + +TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X = { + "id": TEST_GA4GH_IDENTIFIER, + "type": "Allele", + "variation": { + "state": {"type": "LiteralSequenceExpression", "sequence": "F"}, + "digest": TEST_GA4GH_DIGEST, + "location": { + "id": TEST_SEQUENCE_LOCATION_ACCESSION, + "end": 6, + "type": "SequenceLocation", + "start": 5, + "digest": TEST_GA4GH_DIGEST, + "sequenceReference": { + "type": "SequenceReference", + "label": TEST_REFSEQ_IDENTIFIER, + "refgetAccession": TEST_REFGET_ACCESSION, + }, + }, + "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "D"}], + "expressions": [{"value": TEST_HGVS_IDENTIFIER, "syntax": "hgvs.p"}], + }, +} + +# VRS 2.X +TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X = { "id": TEST_GA4GH_IDENTIFIER, "type": "Allele", "state": {"type": "LiteralSequenceExpression", "sequence": "V"}, @@ -64,7 +111,7 @@ "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "W"}], } -TEST_VALID_POST_MAPPED_VRS_ALLELE = { +TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X = { "id": TEST_GA4GH_IDENTIFIER, "type": "Allele", "state": {"type": "LiteralSequenceExpression", "sequence": "F"}, @@ -82,17 +129,29 @@ }, }, "extensions": [{"name": "vrs_ref_allele_seq", "type": "Extension", "value": "D"}], - "expressions": [{"value": f"{TEST_REFSEQ_IDENTIFIER}:p.Asp5Phe", "syntax": "hgvs.p"}], + "expressions": [{"value": TEST_HGVS_IDENTIFIER, "syntax": "hgvs.p"}], } +# VRS 1.X TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE = { "type": "Haplotype", - "members": [TEST_VALID_PRE_MAPPED_VRS_ALLELE, TEST_VALID_PRE_MAPPED_VRS_ALLELE], + "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X], } TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE = { "type": "Haplotype", - "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE, TEST_VALID_POST_MAPPED_VRS_ALLELE], + "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X], +} + +# VRS 2.X +TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK = { + "type": "Haplotype", + "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X], +} + +TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK = { + "type": "Haplotype", + "members": [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X], } TEST_USER = { diff --git a/tests/lib/test_variants.py b/tests/lib/test_variants.py new file mode 100644 index 00000000..7571a5fa --- /dev/null +++ b/tests/lib/test_variants.py @@ -0,0 +1,68 @@ +import pytest +from unittest.mock import MagicMock + +from mavedb.lib.variants import hgvs_from_vrs_allele +from mavedb.lib.variants import hgvs_from_mapped_variant + +from tests.helpers.constants import ( + TEST_HGVS_IDENTIFIER, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE, + TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, +) + + +@pytest.mark.parametrize("allele", [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X]) +def test_hgvs_from_vrs_allele(allele): + result = hgvs_from_vrs_allele(allele) + assert result == TEST_HGVS_IDENTIFIER + + +def test_hgvs_from_vrs_allele_invalid(): + allele = {"invalid_key": "invalid_value"} + with pytest.raises(KeyError): + hgvs_from_vrs_allele(allele) + + +def test_hgvs_from_mapped_variant_haplotype(): + mapped_variant = MagicMock() + mapped_variant.post_mapped = TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE + result = hgvs_from_mapped_variant(mapped_variant) + assert result == [TEST_HGVS_IDENTIFIER, TEST_HGVS_IDENTIFIER] + + +def test_hgvs_from_mapped_variant_cis_phased_block(): + mapped_variant = MagicMock() + mapped_variant.post_mapped = TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK + result = hgvs_from_mapped_variant(mapped_variant) + assert result == [TEST_HGVS_IDENTIFIER, TEST_HGVS_IDENTIFIER] + + +@pytest.mark.parametrize("allele", [TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X]) +def test_hgvs_from_mapped_variant_single_allele(allele): + mapped_variant = MagicMock() + mapped_variant.post_mapped = allele + result = hgvs_from_mapped_variant(mapped_variant) + assert result == [TEST_HGVS_IDENTIFIER] + + +def test_hgvs_from_mapped_variant_empty_post_mapped(): + mapped_variant = MagicMock() + mapped_variant.post_mapped = None + result = hgvs_from_mapped_variant(mapped_variant) + assert result == [] + + +def test_hgvs_from_mapped_variant_invalid_type(): + mapped_variant = MagicMock() + mapped_variant.post_mapped = {"type": "InvalidType"} + with pytest.raises(ValueError): + hgvs_from_mapped_variant(mapped_variant) + + +def test_hgvs_from_mapped_variant_invalid_structure(): + mapped_variant = MagicMock() + mapped_variant.post_mapped = {"invalid_key": "InvalidType"} + with pytest.raises(ValueError): + hgvs_from_mapped_variant(mapped_variant) From 631ae0f1059a5b0d8ce7ba7b54ea06160a4ba760 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 24 Apr 2025 14:11:50 -0700 Subject: [PATCH 074/166] Tests for ClinGen Library Methods --- src/mavedb/constants.py | 4 + src/mavedb/lib/clingen/constants.py | 2 - .../lib/clingen/content_constructors.py | 6 +- src/mavedb/lib/clingen/linked_data_hub.py | 10 +- tests/helpers/constants.py | 10 +- tests/helpers/util.py | 16 +- .../lib/clingen/test_content_constructors.py | 138 ++++++------------ tests/lib/clingen/test_linked_data_hub.py | 100 ++++++------- tests/lib/conftest.py | 102 +++++++++++++ 9 files changed, 223 insertions(+), 165 deletions(-) create mode 100644 src/mavedb/constants.py diff --git a/src/mavedb/constants.py b/src/mavedb/constants.py new file mode 100644 index 00000000..41f9d825 --- /dev/null +++ b/src/mavedb/constants.py @@ -0,0 +1,4 @@ +import os + +MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api" +MAVEDB_FRONTEND_URL = os.getenv("MAVE_FRONTEND_URL", "https://mavedb.org") diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 03be1156..7e21f77b 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -10,8 +10,6 @@ LDH_ENTITY_NAME = "MaveDBMapping" LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/ -MAVEDB_BASE_GIT = "https://github.com/VariantEffect/mavedb-api" - DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" LDH_LINKED_DATA_URL = f"https://genboree.org/{LDH_TENANT}/{LDH_ENTITY_NAME}/id" diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index 858da5b5..b9c35e27 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -2,8 +2,9 @@ from uuid import uuid4 from mavedb import __version__ +from mavedb.constants import MAVEDB_BASE_GIT, MAVEDB_FRONTEND_URL from mavedb.lib.types.clingen import LdhContentLinkedData, LdhContentSubject, LdhEvent, LdhSubmission -from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE, MAVEDB_BASE_GIT +from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE from mavedb.models.mapped_variant import MappedVariant from mavedb.models.variant import Variant @@ -42,8 +43,7 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVari "score": variant.data["score_data"]["score"], # type: ignore }, "entId": variant.urn, # type: ignore - # TODO: We should have some sort of constant for our base url. - "entIri": f"https://staging.mavedb.org/score-sets/{variant.urn}", # type: ignore + "entIri": f"{MAVEDB_FRONTEND_URL}/{variant.urn}", # type: ignore } ] } diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index 0c6df825..085820cf 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -85,15 +85,6 @@ def authenticate(self) -> str: extra=logging_context(), ) - try: - assert GENBOREE_ACCOUNT_NAME is not None, "Genboree account name is not set." - assert GENBOREE_ACCOUNT_PASSWORD is not None, "Genboree account password is not set." - except AssertionError as exc: - msg = "Genboree account name and/or password are not set. Unable to authenticate with Genboree services." - save_to_logging_context(format_raised_exception_info_as_dict(exc)) - logger.error(msg=msg, extra=logging_context()) - raise ValueError(msg) - auth_url = f"https://genboree.org/auth/usr/gb:{GENBOREE_ACCOUNT_NAME}/auth" auth_body = {"type": "plain", "val": GENBOREE_ACCOUNT_PASSWORD} auth_response = requests.post(auth_url, json=auth_body) @@ -115,6 +106,7 @@ def authenticate(self) -> str: raise ValueError(msg) # TODO#411: We should consider using a secret manager to store persistent/setable secrets like this. + # I'd prefer not to ever set environment variables, especially externally generated content. os.environ["GENBOREE_JWT"] = auth_jwt logger.info(msg="Successfully authenticated with Genboree services.", extra=logging_context()) return auth_jwt diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index fd54bb5f..cce29097 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -4,6 +4,12 @@ from mavedb.models.enums.processing_state import ProcessingState + +VALID_EXPERIMENT_SET_URN = "urn:mavedb:01234567" +VALID_EXPERIMENT_URN = f"{VALID_EXPERIMENT_SET_URN}-abcd" +VALID_SCORE_SET_URN = f"{VALID_EXPERIMENT_URN}-0123" +VALID_VARIANT_URN = f"{VALID_SCORE_SET_URN}#1" + TEST_PUBMED_IDENTIFIER = "20711194" TEST_PUBMED_URL_IDENTIFIER = "https://pubmed.ncbi.nlm.nih.gov/37162834/" TEST_BIORXIV_IDENTIFIER = "2021.06.21.212592" @@ -799,7 +805,7 @@ } -TEST_SCORESET_RANGE = { +TEST_SCORE_SET_RANGE = { "wt_score": 1.0, "ranges": [ {"label": "test1", "classification": "normal", "range": (0, 2.0)}, @@ -808,7 +814,7 @@ } -TEST_SAVED_SCORESET_RANGE = { +TEST_SAVED_SCORE_SET_RANGE = { "wtScore": 1.0, "ranges": [ {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, diff --git a/tests/helpers/util.py b/tests/helpers/util.py index 7400c262..6519e4d0 100644 --- a/tests/helpers/util.py +++ b/tests/helpers/util.py @@ -26,9 +26,8 @@ from mavedb.view_models.experiment import Experiment, ExperimentCreate from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate from tests.helpers.constants import ( - TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE, - TEST_VALID_PRE_MAPPED_VRS_ALLELE, - TEST_VALID_POST_MAPPED_VRS_ALLELE, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, EXTRA_USER, TEST_CDOT_TRANSCRIPT, TEST_COLLECTION, @@ -38,7 +37,8 @@ TEST_MINIMAL_POST_MAPPED_METADATA, TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_MAPPED_VARIANT, - TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE, + TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, + TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, ) @@ -228,8 +228,12 @@ def mock_worker_vrs_mapping(client, db, score_set, alleles=True): # It's un-important what the contents of each mapped VRS object are, so use the same constant for each variant. for variant in variants: mapped_variant = MappedVariantDbModel( - pre_mapped=TEST_VALID_PRE_MAPPED_VRS_ALLELE if alleles else TEST_VALID_PRE_MAPPED_VRS_HAPLOTYPE, - post_mapped=TEST_VALID_POST_MAPPED_VRS_ALLELE if alleles else TEST_VALID_POST_MAPPED_VRS_HAPLOTYPE, + pre_mapped=TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X + if alleles + else TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, + post_mapped=TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X + if alleles + else TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, variant=variant, vrs_version="2.0", modification_date=date.today(), diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py index 1e691b1d..dc831413 100644 --- a/tests/lib/clingen/test_content_constructors.py +++ b/tests/lib/clingen/test_content_constructors.py @@ -1,25 +1,36 @@ from unittest.mock import patch from uuid import UUID +from mavedb.constants import MAVEDB_BASE_GIT, MAVEDB_FRONTEND_URL from mavedb.lib.clingen.content_constructors import ( construct_ldh_submission_event, construct_ldh_submission_subject, construct_ldh_submission, construct_ldh_submission_entity, ) -from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE, MAVEDB_BASE_GIT +from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE from mavedb import __version__ +from tests.helpers.constants import ( + TEST_HGVS_IDENTIFIER, + VALID_VARIANT_URN, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, +) + + +def test_construct_ldh_submission_subject(): + result = construct_ldh_submission_subject(TEST_HGVS_IDENTIFIER) + assert result == {"Variant": {"hgvs": TEST_HGVS_IDENTIFIER}} + def test_construct_ldh_submission_event(): - sbj = {"Variant": {"hgvs": "NM_000546.5:c.215C>G"}} + sbj = construct_ldh_submission_subject(TEST_HGVS_IDENTIFIER) with ( patch("mavedb.lib.clingen.content_constructors.uuid4") as mock_uuid4, - patch("mavedb.lib.clingen.content_constructors.datetime") as mock_datetime, ): mock_uuid4.return_value = UUID("12345678-1234-5678-1234-567812345678") - mock_datetime.now.return_value = "2023-01-01T00:00:00" result = construct_ldh_submission_event(sbj) @@ -27,7 +38,7 @@ def test_construct_ldh_submission_event(): assert result["name"] == LDH_ENTITY_NAME assert result["uuid"] == "12345678-1234-5678-1234-567812345678" assert result["sbj"] == { - "id": "NM_000546.5:c.215C>G", + "id": TEST_HGVS_IDENTIFIER, "type": "Variant", "format": "hgvs", "add": True, @@ -37,100 +48,41 @@ def test_construct_ldh_submission_event(): "id": "resource_published", "iri": f"{MAVEDB_BASE_GIT}/releases/tag/v{__version__}", } - assert result["triggered"]["at"] == "2023-01-01T00:00:00" - - -def test_construct_ldh_submission_subject(): - hgvs = "NM_000546.5:c.215C>G" - result = construct_ldh_submission_subject(hgvs) - assert result == {"Variant": {"hgvs": hgvs}} - -def test_construct_ldh_submission_entity(): - class MockVariant: - def __init__(self, urn, data): - self.urn = urn - self.data = data - - class MockMappedVariant: - def __init__(self, pre_mapped, post_mapped, mapping_api_version): - self.pre_mapped = pre_mapped - self.post_mapped = post_mapped - self.mapping_api_version = mapping_api_version - - variant = MockVariant( - urn="urn:example:variant123", - data={"score_data": {"score": 0.95}}, - ) - mapped_variant = MockMappedVariant( - pre_mapped="pre-mapped-value", - post_mapped="post-mapped-value", - mapping_api_version="v1.0", - ) - - result = construct_ldh_submission_entity(variant, mapped_variant) +def test_construct_ldh_submission_entity(mock_variant, mock_mapped_variant): + result = construct_ldh_submission_entity(mock_variant, mock_mapped_variant) assert "MaveDBMapping" in result assert len(result["MaveDBMapping"]) == 1 mapping = result["MaveDBMapping"][0] - assert mapping["entContent"]["mavedb_id"] == "urn:example:variant123" - assert mapping["entContent"]["pre_mapped"] == "pre-mapped-value" - assert mapping["entContent"]["post_mapped"] == "post-mapped-value" - assert mapping["entContent"]["mapping_api_version"] == "v1.0" - assert mapping["entContent"]["score"] == 0.95 - - assert mapping["entId"] == "urn:example:variant123" - assert mapping["entIri"] == "https://staging.mavedb.org/score-sets/urn:example:variant123" - - -def test_construct_ldh_submission(): - class MockVariant: - def __init__(self, urn, data): - self.urn = urn - self.data = data - - class MockMappedVariant: - def __init__(self, pre_mapped, post_mapped, mapping_api_version): - self.pre_mapped = pre_mapped - self.post_mapped = post_mapped - self.mapping_api_version = mapping_api_version - - variant1 = MockVariant( - urn="urn:example:variant123", - data={"score_data": {"score": 0.95}}, - ) - mapped_variant1 = MockMappedVariant( - pre_mapped="pre-mapped-value1", - post_mapped="post-mapped-value1", - mapping_api_version="v1.0", - ) - - variant2 = MockVariant( - urn="urn:example:variant456", - data={"score_data": {"score": 0.85}}, - ) - mapped_variant2 = MockMappedVariant( - pre_mapped="pre-mapped-value2", - post_mapped="post-mapped-value2", - mapping_api_version="v2.0", - ) + assert mapping["entContent"]["mavedb_id"] == VALID_VARIANT_URN + assert mapping["entContent"]["pre_mapped"] == TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X + assert mapping["entContent"]["post_mapped"] == TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X + assert mapping["entContent"]["mapping_api_version"] == "pytest.mapping.1.0" + assert mapping["entContent"]["score"] == 1.0 + assert mapping["entId"] == VALID_VARIANT_URN + assert mapping["entIri"] == f"{MAVEDB_FRONTEND_URL}/{VALID_VARIANT_URN}" + + +def test_construct_ldh_submission(mock_variant, mock_mapped_variant): variant_content = [ - ("NM_000546.5:c.215C>G", variant1, mapped_variant1), - ("NM_000546.5:c.216C>T", variant2, mapped_variant2), + (TEST_HGVS_IDENTIFIER, mock_variant, mock_mapped_variant), + (TEST_HGVS_IDENTIFIER, mock_variant, mock_mapped_variant), ] + uuid_1 = UUID("12345678-1234-5678-1234-567812345678") + uuid_2 = UUID("87654321-4321-8765-4321-876543218765") + with ( patch("mavedb.lib.clingen.content_constructors.uuid4") as mock_uuid4, - patch("mavedb.lib.clingen.content_constructors.datetime") as mock_datetime, ): mock_uuid4.side_effect = [ - UUID("12345678-1234-5678-1234-567812345678"), - UUID("87654321-4321-8765-4321-876543218765"), + uuid_1, + uuid_2, ] - mock_datetime.now.return_value = "2023-01-01T00:00:00" result = construct_ldh_submission(variant_content) @@ -138,16 +90,16 @@ def __init__(self, pre_mapped, post_mapped, mapping_api_version): # Validate the first submission submission1 = result[0] - assert submission1["event"]["uuid"] == "12345678-1234-5678-1234-567812345678" - assert submission1["event"]["sbj"]["id"] == "NM_000546.5:c.215C>G" - assert submission1["content"]["sbj"] == {"Variant": {"hgvs": "NM_000546.5:c.215C>G"}} - assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == "urn:example:variant123" - assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 0.95 + assert submission1["event"]["uuid"] == uuid_1 + assert submission1["event"]["sbj"]["id"] == TEST_HGVS_IDENTIFIER + assert submission1["content"]["sbj"] == {"Variant": {"hgvs": TEST_HGVS_IDENTIFIER}} + assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == VALID_VARIANT_URN + assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 1.0 # Validate the second submission submission2 = result[1] - assert submission2["event"]["uuid"] == "87654321-4321-8765-4321-876543218765" - assert submission2["event"]["sbj"]["id"] == "NM_000546.5:c.216C>T" - assert submission2["content"]["sbj"] == {"Variant": {"hgvs": "NM_000546.5:c.216C>T"}} - assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == "urn:example:variant456" - assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 0.85 + assert submission2["event"]["uuid"] == uuid_2 + assert submission2["event"]["sbj"]["id"] == TEST_HGVS_IDENTIFIER + assert submission2["content"]["sbj"] == {"Variant": {"hgvs": TEST_HGVS_IDENTIFIER}} + assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == VALID_VARIANT_URN + assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["score"] == 1.0 diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py index dc1750ef..17bbafc8 100644 --- a/tests/lib/clingen/test_linked_data_hub.py +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -4,18 +4,23 @@ import requests from datetime import datetime from unittest.mock import patch, MagicMock -from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL + +from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD +from mavedb.lib.utils import batched from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation +TEST_CLINGEN_URL = "https://pytest.clingen.com" + + @pytest.fixture def clingen_service(): - yield ClinGenLdhService(url="https://pytest.clingen.com") + yield ClinGenLdhService(url=TEST_CLINGEN_URL) class TestClinGenLdhService: def test_init(self, clingen_service): - assert clingen_service.url == "https://pytest.clingen.com" + assert clingen_service.url == TEST_CLINGEN_URL ### Test the authenticate method @@ -28,9 +33,6 @@ def test_authenticate_with_existing_jwt(self, clingen_service: ClinGenLdhService @patch("mavedb.lib.clingen.linked_data_hub.requests.post") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") - @patch.dict( - os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}, clear=True - ) def test_authenticate_with_new_jwt(self, mock_existing_jwt, mock_post, clingen_service): mock_existing_jwt.return_value = None @@ -43,25 +45,13 @@ def test_authenticate_with_new_jwt(self, mock_existing_jwt, mock_post, clingen_s assert jwt == "new_jwt_token" assert os.environ["GENBOREE_JWT"] == "new_jwt_token" mock_post.assert_called_once_with( - "https://genboree.org/auth/usr/gb:test_account/auth", - json={"type": "plain", "val": "test_password"}, + f"https://genboree.org/auth/usr/gb:{GENBOREE_ACCOUNT_NAME}/auth", + json={"type": "plain", "val": GENBOREE_ACCOUNT_PASSWORD}, ) @patch("mavedb.lib.clingen.linked_data_hub.requests.post") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") - @patch.dict(os.environ, {}, clear=True) - def test_authenticate_missing_credentials(self, mock_existing_jwt, mock_post, clingen_service): - mock_existing_jwt.return_value = None - with pytest.raises(ValueError, match="Genboree account name and/or password are not set"): - clingen_service.authenticate() - mock_post.assert_not_called() - - @patch("mavedb.lib.clingen.linked_data_hub.requests.post") - @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") - @patch.dict( - os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}, clear=True - ) - def test_authenticate_http_error(mock_existing_jwt, mock_post, clingen_service): + def test_authenticate_http_error(self, mock_existing_jwt, mock_post, clingen_service): mock_existing_jwt.return_value = None mock_response = MagicMock() @@ -70,12 +60,12 @@ def test_authenticate_http_error(mock_existing_jwt, mock_post, clingen_service): with pytest.raises(requests.exceptions.HTTPError, match="HTTP Error"): clingen_service.authenticate() + mock_post.assert_called_once() @patch("mavedb.lib.clingen.linked_data_hub.requests.post") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService._existing_jwt") - @patch.dict(os.environ, {"GENBOREE_ACCOUNT_NAME": "test_account", "GENBOREE_ACCOUNT_PASSWORD": "test_password"}) - def test_authenticate_missing_jwt_in_response(mock_existing_jwt, mock_post, clingen_service): + def test_authenticate_missing_jwt_in_response(self, mock_existing_jwt, mock_post, clingen_service): mock_existing_jwt.return_value = None mock_response = MagicMock() @@ -85,37 +75,41 @@ def test_authenticate_missing_jwt_in_response(mock_existing_jwt, mock_post, clin with pytest.raises(ValueError, match="Could not parse JWT from valid response"): clingen_service.authenticate() + mock_post.assert_called_once() ### Test the _existing_jwt method @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") @patch("mavedb.lib.clingen.linked_data_hub.jwt.get_unverified_claims") - def test_existing_jwt_valid(mock_get_unverified_claims, mock_getenv, clingen_service): + def test_existing_jwt_valid(self, mock_get_unverified_claims, mock_getenv, clingen_service): mock_getenv.return_value = "valid_jwt_token" mock_get_unverified_claims.return_value = {"exp": (datetime.now().timestamp() + 3600)} jwt = clingen_service._existing_jwt() + assert jwt == "valid_jwt_token" mock_getenv.assert_called_once_with("GENBOREE_JWT") mock_get_unverified_claims.assert_called_once_with("valid_jwt_token") @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") @patch("mavedb.lib.clingen.linked_data_hub.jwt.get_unverified_claims") - def test_existing_jwt_expired(mock_get_unverified_claims, mock_getenv, clingen_service): + def test_existing_jwt_expired(self, mock_get_unverified_claims, mock_getenv, clingen_service): mock_getenv.return_value = "expired_jwt_token" mock_get_unverified_claims.return_value = {"exp": (datetime.now().timestamp() - 3600)} jwt = clingen_service._existing_jwt() + assert jwt is None mock_getenv.assert_called_once_with("GENBOREE_JWT") mock_get_unverified_claims.assert_called_once_with("expired_jwt_token") @patch("mavedb.lib.clingen.linked_data_hub.os.getenv") - def test_existing_jwt_not_set(mock_getenv, clingen_service): + def test_existing_jwt_not_set(self, mock_getenv, clingen_service): mock_getenv.return_value = None jwt = clingen_service._existing_jwt() + assert jwt is None mock_getenv.assert_called_once_with("GENBOREE_JWT") @@ -124,28 +118,32 @@ def test_existing_jwt_not_set(mock_getenv, clingen_service): @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") @patch("mavedb.lib.clingen.linked_data_hub.batched") - def test_dispatch_submissions_success(mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service): + def test_dispatch_submissions_success( + self, mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service + ): mock_authenticate.return_value = "test_jwt_token" mock_request_with_backoff.return_value.json.return_value = {"success": True} content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] mock_batched.return_value = [[{"id": 1}, {"id": 2}], [{"id": 3}]] # Simulate batching - successes, failures = clingen_service.dispatch_submissions(content_submissions, batch_size=2) + batch_size = 2 + successes, failures = clingen_service.dispatch_submissions(content_submissions, batch_size=batch_size) - assert len(successes) == 3 + assert len(successes) == 2 # 2 batches assert len(failures) == 0 mock_batched.assert_called_once_with(content_submissions, 2) - mock_request_with_backoff.assert_called_with( - method="PUT", - url=clingen_service.url, - json={"id": 1}, - headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, - ) + for submission in batched(content_submissions, batch_size): + mock_request_with_backoff.assert_any_call( + method="PUT", + url=clingen_service.url, + json=submission, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") - def test_dispatch_submissions_failure(mock_authenticate, mock_request_with_backoff, clingen_service): + def test_dispatch_submissions_failure(self, mock_authenticate, mock_request_with_backoff, clingen_service): mock_authenticate.return_value = "test_jwt_token" mock_request_with_backoff.side_effect = requests.exceptions.RequestException("Request failed") @@ -155,16 +153,17 @@ def test_dispatch_submissions_failure(mock_authenticate, mock_request_with_backo assert len(successes) == 0 assert len(failures) == 3 - mock_request_with_backoff.assert_called_with( - method="PUT", - url=clingen_service.url, - json={"id": 1}, - headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, - ) + for submission in content_submissions: + mock_request_with_backoff.assert_any_call( + method="PUT", + url=clingen_service.url, + json=submission, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") - def test_dispatch_submissions_partial_success(mock_authenticate, mock_request_with_backoff, clingen_service): + def test_dispatch_submissions_partial_success(self, mock_authenticate, mock_request_with_backoff, clingen_service): mock_authenticate.return_value = "test_jwt_token" def mock_request_with_backoff_side_effect(*args, **kwargs): @@ -186,7 +185,7 @@ def mock_request_with_backoff_side_effect(*args, **kwargs): @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") @patch("mavedb.lib.clingen.linked_data_hub.batched") def test_dispatch_submissions_no_batching( - mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service + self, mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service ): mock_authenticate.return_value = "test_jwt_token" mock_request_with_backoff.return_value.json.return_value = {"success": True} @@ -199,12 +198,13 @@ def test_dispatch_submissions_no_batching( assert len(successes) == 3 assert len(failures) == 0 mock_batched.assert_not_called() - mock_request_with_backoff.assert_called_with( - method="PUT", - url=clingen_service.url, - json={"id": 1}, - headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, - ) + for submission in content_submissions: + mock_request_with_backoff.assert_any_call( + method="PUT", + url=clingen_service.url, + json=submission, + headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, + ) @patch("mavedb.lib.clingen.linked_data_hub.requests.get") diff --git a/tests/lib/conftest.py b/tests/lib/conftest.py index 076dac4b..a3629ca9 100644 --- a/tests/lib/conftest.py +++ b/tests/lib/conftest.py @@ -1,10 +1,19 @@ import pytest +from unittest import mock +from datetime import datetime from mavedb.models.enums.user_role import UserRole +from mavedb.models.experiment_set import ExperimentSet +from mavedb.models.experiment import Experiment from mavedb.models.license import License +from mavedb.models.publication_identifier import PublicationIdentifier +from mavedb.models.score_set_publication_identifier import ScoreSetPublicationIdentifierAssociation from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy +from mavedb.models.score_set import ScoreSet from mavedb.models.user import User +from mavedb.models.variant import Variant +from mavedb.models.mapped_variant import MappedVariant from tests.helpers.constants import ( ADMIN_USER, EXTRA_USER, @@ -12,6 +21,15 @@ TEST_INACTIVE_LICENSE, TEST_TAXONOMY, TEST_USER, + VALID_VARIANT_URN, + VALID_SCORE_SET_URN, + VALID_EXPERIMENT_URN, + VALID_EXPERIMENT_SET_URN, + TEST_PUBMED_IDENTIFIER, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_SCORE_SET_RANGE, + TEST_SCORE_CALIBRATION, ) @@ -29,3 +47,87 @@ def setup_lib_db(session): db.add(License(**TEST_LICENSE)) db.add(License(**TEST_INACTIVE_LICENSE)) db.commit() + + +@pytest.fixture +def mock_user(): + mv = mock.Mock(spec=User) + mv.username = TEST_USER["username"] + return mv + + +@pytest.fixture +def mock_publication(): + mv = mock.Mock(spec=PublicationIdentifier) + mv.identifier = TEST_PUBMED_IDENTIFIER + mv.url = f"http://www.ncbi.nlm.nih.gov/pubmed/{TEST_PUBMED_IDENTIFIER}" + return mv + + +@pytest.fixture +def mock_publication_associations(mock_publication): + mv = mock.Mock(spec=ScoreSetPublicationIdentifierAssociation) + mv.publication = mock_publication + mv.primary = True + return [mv] + + +@pytest.fixture +def mock_experiment_set(): + resource = mock.Mock(spec=ExperimentSet) + resource.urn = VALID_EXPERIMENT_SET_URN + resource.creation_date = datetime(2023, 1, 1) + resource.modification_date = datetime(2023, 1, 2) + return resource + + +@pytest.fixture +def mock_experiment(): + experiment = mock.Mock(spec=Experiment) + experiment.title = "Test Experiment" + experiment.urn = VALID_EXPERIMENT_URN + experiment.creation_date = datetime(2023, 1, 1) + experiment.modification_date = datetime(2023, 1, 2) + return experiment + + +@pytest.fixture +def mock_score_set(mock_user, mock_experiment, mock_publication_associations): + score_set = mock.Mock(spec=ScoreSet) + score_set.urn = VALID_SCORE_SET_URN + score_set.score_ranges = TEST_SCORE_SET_RANGE + score_set.score_calibrations = {"pillar_project": TEST_SCORE_CALIBRATION} + score_set.license.short_name = "MIT" + score_set.created_by = mock_user + score_set.modified_by = mock_user + score_set.published_date = datetime(2023, 1, 1) + score_set.title = "Mock score set" + score_set.creation_date = datetime(2023, 1, 2) + score_set.modification_date = datetime(2023, 1, 3) + score_set.experiment = mock_experiment + score_set.publication_identifier_associations = mock_publication_associations + return score_set + + +@pytest.fixture +def mock_variant(mock_score_set): + variant = mock.Mock(spec=Variant) + variant.urn = VALID_VARIANT_URN + variant.score_set = mock_score_set + variant.data = {"score_data": {"score": 1.0}} + variant.creation_date = datetime(2023, 1, 2) + variant.modification_date = datetime(2023, 1, 3) + return variant + + +@pytest.fixture +def mock_mapped_variant(mock_variant): + mv = mock.Mock(spec=MappedVariant) + mv.mapping_api_version = "pytest.mapping.1.0" + mv.mapped_date = datetime(2023, 1, 1) + mv.variant = mock_variant + mv.pre_mapped = TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X + mv.post_mapped = TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X + mv.mapped_date = datetime(2023, 1, 2) + mv.modification_date = datetime(2023, 1, 3) + return mv From 8f538610bcf0ac8cd48cca86fbd635b9b14461f4 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sat, 26 Apr 2025 16:37:19 -0700 Subject: [PATCH 075/166] Test cases for ClinGen submission --- src/mavedb/worker/jobs.py | 11 +- tests/helpers/constants.py | 78 +++++++++ tests/lib/test_score_set.py | 4 +- tests/routers/test_score_set.py | 16 +- tests/worker/test_jobs.py | 297 +++++++++++++++++++++++++++++--- 5 files changed, 369 insertions(+), 37 deletions(-) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 715f6ede..377a604e 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -764,14 +764,13 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": False, "retried": False, "enqueued_job": None} try: - variant_objects = db.scalars( + variant_objects = db.execute( select(Variant, MappedVariant) .join(MappedVariant) - .where( - Variant.score_set_id == score_set.id, - MappedVariant.current.is_(True), - MappedVariant.post_mapped.is_not(None), - ) + .join(ScoreSet) + .where(ScoreSet.urn == score_set.urn) + .where(MappedVariant.post_mapped.is_not(None)) + .where(MappedVariant.current.is_(True)) ).all() if not variant_objects: diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index cce29097..a134a468 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -30,6 +30,10 @@ VALID_ACCESSION = "NM_001637.3" VALID_GENE = "BRCA1" +VALID_CLINGEN_PA_ID = "PA2579908752" +VALID_CLINGEN_CA_ID = "CA341478553" +VALID_CLINGEN_LDH_ID = "2786738861" + SAVED_PUBMED_PUBLICATION = { "recordType": "PublicationIdentifier", "identifier": "20711194", @@ -938,3 +942,77 @@ "metadata": {"rendered": {"by": "https://genboree.org/mq/brdg/srvc", "when": datetime.now().isoformat()}}, "status": {"code": 200, "name": "OK"}, } + + +TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE = { + "metadata": {"rendered": {"when": datetime.now().isoformat()}}, + "status": {"code": 403, "msg": "Bad Auth Info - jwt malformed", "name": "Forbidden"}, +} + +TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE = { + "metadata": {"rendered": {"when": datetime.now().isoformat()}}, + "status": { + "code": 400, + "msg": "Put Failed - Error! Submission was an empty object. Submission must consist of valid, non-Empty JSON objects", + "name": "Bad Request", + }, +} + + +TEST_CLINGEN_LDH_LINKING_RESPONSE = { + "data": { + "created": datetime.now().isoformat(), + "creator": "brl_clingen", + "entContent": { + "mapping_api_version": "pytest.mapping.1.0", + "mavedb_id": VALID_VARIANT_URN, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + "score": 1.0, + }, + "entId": VALID_VARIANT_URN, + "entIri": f"https://staging.mavedb.org/score-sets/{VALID_VARIANT_URN}", + "entType": "MaveDBMapping", + "ldFor": { + "Variant": [ + { + "created": datetime.now().isoformat(), + "creator": "brl_clingen", + "entId": VALID_CLINGEN_PA_ID, + "entIri": f"http://reg.genome.network/allele/{VALID_CLINGEN_PA_ID}", + "entType": "Variant", + "ldhId": VALID_CLINGEN_LDH_ID, + "ldhIri": f"https://10.15.55.128/ldh-stg/Variant/id/{VALID_CLINGEN_LDH_ID}", + "modified": datetime.now().isoformat(), + "modifier": "brl_clingen", + "rev": "_hLpznbC-A-", + } + ] + }, + "ldhId": VALID_CLINGEN_LDH_ID, + "ldhIri": f"https://10.15.55.128/ldh-stg/MaveDBMapping/id/{VALID_CLINGEN_LDH_ID}", + "modified": datetime.now().isoformat(), + "modifier": "brl_clingen", + "rev": "_jj3a99K---", + }, + "metadata": {"rendered": {"by": "https://10.15.55.128/ldh-stg/srvc", "when": datetime.now().isoformat()}}, + "status": {"code": 200, "name": "OK"}, +} + + +TEST_CLINGEN_LDH_LINKING_RESPONSE_NOT_FOUND = { + "metadata": {"rendered": {"by": "https://10.15.55.128/ldh-stg/srvc", "when": datetime.now().isoformat()}}, + "status": { + "code": 404, + "msg": f"Bad Entity - No 'MaveDBMapping' entity found with identifier {VALID_VARIANT_URN}", + "name": "Not Found", + }, +} + + +TEST_CLINGEN_LDH_LINKING_RESPONSE_BAD_REQUEST = { + "errCode": 400, + "errMsg": "INVALID URL - Your request is invalid. Specifically, the URL path you provided ('/ldh-stg/MaveDBMapping/i/urn%3Amavedb%3A00000050-a-1%231') is not valid for HTTP 'GET' requests to the CG-LDH API service.", + "errName": "Bad Request", + "errCat": "INVALID URL", +} diff --git a/tests/lib/test_score_set.py b/tests/lib/test_score_set.py index d95ad6f1..3179b921 100644 --- a/tests/lib/test_score_set.py +++ b/tests/lib/test_score_set.py @@ -21,7 +21,7 @@ ) from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant -from tests.helpers.constants import TEST_SAVED_SCORESET_RANGE +from tests.helpers.constants import TEST_SAVED_SCORE_SET_RANGE from tests.helpers.util import create_acc_score_set, create_experiment, create_seq_score_set @@ -320,7 +320,7 @@ def test_create_null_score_range(setup_lib_db, client, session): def test_update_null_score_range(setup_lib_db, client, session): experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"], update={"scoreRanges": TEST_SAVED_SCORESET_RANGE}) + score_set = create_seq_score_set(client, experiment["urn"], update={"scoreRanges": TEST_SAVED_SCORE_SET_RANGE}) db_score_set = session.scalar(select(ScoreSet).where(ScoreSet.score_ranges.is_(None))) assert db_score_set is None diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 720bf07d..773c26b2 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -26,8 +26,8 @@ TEST_MINIMAL_SEQ_SCORESET_RESPONSE, TEST_PUBMED_IDENTIFIER, TEST_ORCID_ID, - TEST_SCORESET_RANGE, - TEST_SAVED_SCORESET_RANGE, + TEST_SCORE_SET_RANGE, + TEST_SAVED_SCORE_SET_RANGE, TEST_MINIMAL_ACC_SCORESET_RESPONSE, TEST_USER, TEST_INACTIVE_LICENSE, @@ -138,7 +138,7 @@ def test_create_score_set_with_score_range(client, setup_router_db): experiment = create_experiment(client) score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set["experimentUrn"] = experiment["urn"] - score_set.update({"score_ranges": TEST_SCORESET_RANGE}) + score_set.update({"score_ranges": TEST_SCORE_SET_RANGE}) response = client.post("/api/v1/score-sets/", json=score_set) assert response.status_code == 200 @@ -150,7 +150,7 @@ def test_create_score_set_with_score_range(client, setup_router_db): expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) - expected_response["scoreRanges"] = TEST_SAVED_SCORESET_RANGE + expected_response["scoreRanges"] = TEST_SAVED_SCORE_SET_RANGE assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -164,7 +164,7 @@ def test_remove_score_range_from_score_set(client, setup_router_db): experiment = create_experiment(client) score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set["experimentUrn"] = experiment["urn"] - score_set.update({"score_ranges": TEST_SCORESET_RANGE}) + score_set.update({"score_ranges": TEST_SCORE_SET_RANGE}) response = client.post("/api/v1/score-sets/", json=score_set) assert response.status_code == 200 @@ -176,7 +176,7 @@ def test_remove_score_range_from_score_set(client, setup_router_db): expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) - expected_response["scoreRanges"] = TEST_SAVED_SCORESET_RANGE + expected_response["scoreRanges"] = TEST_SAVED_SCORE_SET_RANGE assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -235,7 +235,7 @@ def test_cannot_create_score_set_with_invalid_target_gene_category(client, setup ("doi_identifiers", [{"identifier": TEST_CROSSREF_IDENTIFIER}], [SAVED_DOI_IDENTIFIER]), ("license_id", EXTRA_LICENSE["id"], SAVED_SHORT_EXTRA_LICENSE), ("target_genes", TEST_MINIMAL_ACC_SCORESET["targetGenes"], TEST_MINIMAL_ACC_SCORESET_RESPONSE["targetGenes"]), - ("score_ranges", TEST_SCORESET_RANGE, TEST_SAVED_SCORESET_RANGE), + ("score_ranges", TEST_SCORE_SET_RANGE, TEST_SAVED_SCORE_SET_RANGE), ], ) @pytest.mark.parametrize( @@ -365,7 +365,7 @@ def test_can_update_score_set_supporting_data_after_publication( ("target_genes", TEST_MINIMAL_ACC_SCORESET["targetGenes"], TEST_MINIMAL_SEQ_SCORESET_RESPONSE["targetGenes"]), ( "score_ranges", - TEST_SCORESET_RANGE, + TEST_SCORE_SET_RANGE, None, ), ], diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 6886ef82..7e141db0 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -15,6 +15,7 @@ from mavedb.data_providers.services import VRSMap from mavedb.lib.mave.constants import HGVS_NT_COLUMN from mavedb.lib.score_sets import csv_data_to_df +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState @@ -35,11 +36,17 @@ from tests.helpers.constants import ( TEST_CDOT_TRANSCRIPT, TEST_CLINGEN_SUBMISSION_RESPONSE, + TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, + TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, TEST_VARIANT_MAPPING_SCAFFOLD, VALID_ACCESSION, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, ) from tests.helpers.util import awaitable_exception @@ -93,6 +100,28 @@ async def setup_records_files_and_variants(session, async_client, data_files, in return score_set_with_variants +async def setup_records_files_and_variants_with_mapping( + session, async_client, data_files, input_score_set, standalone_worker_context +): + score_set = await setup_records_files_and_variants( + session, async_client, data_files, input_score_set, standalone_worker_context + ) + await sanitize_mapping_queue(standalone_worker_context, score_set) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ): + result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + + async def sanitize_mapping_queue(standalone_worker_context, score_set): queued_job = await standalone_worker_context["redis"].rpop(MAPPING_QUEUE_NAME) assert int(queued_job.decode("utf-8")) == score_set.id @@ -110,10 +139,10 @@ async def setup_mapping_output(async_client, session, score_set, empty=False): variants = session.scalars(select(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).all() for variant in variants: mapped_score = { - "pre_mapped": {"test": "pre_mapped_output"}, - "pre_mapped_2_0": {"test": "pre_mapped_output (2.0)"}, - "post_mapped": {"test": "post_mapped_output"}, - "post_mapped_2_0": {"test": "post_mapped_output (2.0)"}, + "pre_mapped": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X, + "pre_mapped_2_0": TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + "post_mapped": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, + "post_mapped_2_0": TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, "mavedb_id": variant.urn, } @@ -442,7 +471,7 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.jobs.BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts) await arq_worker.async_run() @@ -1359,7 +1388,7 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch("mavedb.worker.jobs.BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() @@ -1403,7 +1432,7 @@ async def dummy_mapping_job(): "run_in_executor", side_effect=[failed_mapping_job(), dummy_mapping_job()], ), - patch("mavedb.worker.jobs.BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() @@ -1444,7 +1473,7 @@ async def failed_mapping_job(): "run_in_executor", side_effect=[failed_mapping_job()] * 5, ), - patch("mavedb.worker.jobs.BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() @@ -1470,7 +1499,7 @@ async def failed_mapping_job(): async def test_submit_score_set_mappings_to_ldh_success( setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis ): - score_set = await setup_records_files_and_variants( + score_set = await setup_records_files_and_variants_with_mapping( session, async_client, data_files, @@ -1479,21 +1508,74 @@ async def test_submit_score_set_mappings_to_ldh_success( ) async def dummy_submission_job(): - return TEST_CLINGEN_SUBMISSION_RESPONSE + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_submission_job(), + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) assert result["success"] assert not result["retried"] + assert result["enqueued_job"] is not None @pytest.mark.asyncio -async def test_submit_score_set_mappings_to_ldh_exception_in_submission( +async def test_submit_score_set_mappings_to_ldh_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "src.mavedb.worker.jobs.setup_job_state", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_auth( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch.object( + ClinGenLdhService, + "_existing_jwt", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_no_variants_exist( setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis ): score_set = await setup_records_files_and_variants( @@ -1504,15 +1586,188 @@ async def test_submit_score_set_mappings_to_ldh_exception_in_submission( standalone_worker_context, ) + with ( + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "src.mavedb.lib.variants.hgvs_from_mapped_variant", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_construction( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "src.mavedb.lib.clingen.content_constructors.construct_ldh_submission", + side_effect=Exception(), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_submission( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + async def failed_submission_job(): - raise Exception("Submission failed") + return Exception() - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=failed_submission_job(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=failed_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "error_response", [TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE] +) +async def test_submit_score_set_mappings_to_ldh_submission_failures_exist( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis, error_response +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [None, error_response] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), ): - result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id, 1) + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_exception_during_linking_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(ArqRedis, "enqueue_job", side_effect=Exception()), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_submit_score_set_mappings_to_ldh_linking_not_queued_when_expected( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_submission_job(), + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch.object(ArqRedis, "enqueue_job", return_value=None), + ): + result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) assert not result["success"] assert not result["retried"] + assert not result["enqueued_job"] From 467fe3b18454890408aea3d65fd2d183a3e02d7b Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 28 Apr 2025 12:03:11 -0700 Subject: [PATCH 076/166] Tests for Clingen Linkage Job --- src/mavedb/lib/clingen/linked_data_hub.py | 23 +- src/mavedb/scripts/link_clingen_variants.py | 9 +- src/mavedb/worker/jobs.py | 70 +++-- tests/worker/test_jobs.py | 325 +++++++++++++++++++- 4 files changed, 385 insertions(+), 42 deletions(-) diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index 085820cf..9cf4dafb 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -216,7 +216,28 @@ def get_clingen_variation(urn: str) -> Optional[dict]: ) if response.status_code == 200: - return response.json()["data"]["ldFor"]["Variant"][0] + return response.json() else: logger.error(f"Failed to fetch data for URN {urn}: {response.status_code} - {response.text}") return None + + +def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[str]: + """ + Extracts the ClinGen allele ID from a given variation dictionary. + + Args: + variation (Optional[dict]): A dictionary containing variation data, otherwise None. + + Returns: + Optional[str]: The ClinGen allele ID if found, otherwise None. + """ + if not variation: + return None + + try: + return variation["data"]["ldFor"]["Variant"][0]["entId"] + except KeyError as exc: + save_to_logging_context(format_raised_exception_info_as_dict(exc)) + logger.error("Failed to extract ClinGen allele ID from variation data.", extra=logging_context()) + return None diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index d0cd921c..5f81e308 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -5,7 +5,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session -from mavedb.lib.clingen.linked_data_hub import get_clingen_variation +from mavedb.lib.clingen.linked_data_hub import get_clingen_variation, clingen_allele_id_from_ldh_variation from mavedb.models.score_set import ScoreSet from mavedb.models.variant import Variant from mavedb.models.mapped_variant import MappedVariant @@ -45,8 +45,9 @@ def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, un failed_urns = [] for urn in urns: ldh_variation = get_clingen_variation(urn) + allele_id = clingen_allele_id_from_ldh_variation(ldh_variation) - if not ldh_variation: + if not allele_id: failed_urns.append(urn) continue @@ -57,10 +58,10 @@ def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, un failed_urns.append(urn) continue - mapped_variant.clingen_allele_id = ldh_variation["entId"] + mapped_variant.clingen_allele_id = allele_id db.add(mapped_variant) - logger.info(f"Successfully linked URN {urn} to ClinGen variation {ldh_variation['entId']}.") + logger.info(f"Successfully linked URN {urn} to ClinGen variation {allele_id}.") if failed_urns: logger.warning(f"Failed to link the following {len(failed_urns)} URNs: {', '.join(failed_urns)}") diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 377a604e..9b6c9efe 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -21,7 +21,11 @@ LINKED_DATA_RETRY_THRESHOLD, ) from mavedb.lib.clingen.content_constructors import construct_ldh_submission -from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation +from mavedb.lib.clingen.linked_data_hub import ( + ClinGenLdhService, + get_clingen_variation, + clingen_allele_id_from_ldh_variation, +) from mavedb.lib.exceptions import ( MappingEnqueueError, LinkingEnqueueError, @@ -814,6 +818,8 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) + return {"success": False, "retried": False, "enqueued_job": None} + try: assert not submission_failures, f"{len(submission_failures)} submissions failed to be dispatched to the LDH." logger.info(msg="Dispatched all variant mapping submissions to the LDH.", extra=logging_context) @@ -863,23 +869,6 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": True, "retried": False, "enqueued_job": new_job} -def get_ldh_variation(logging_ctx: dict, variant_urns: list[str]): - linked_data = [] - for idx, variant_urn in enumerate(variant_urns): - logging_ctx["on_variation_fetch"] = idx - ldh_variation = get_clingen_variation(variant_urn) - - if not ldh_variation: - linked_data.append((variant_urn, None)) - continue - else: - linked_data.append((variant_urn, ldh_variation["entId"])) - - logger.debug(msg=f"Found ClinGen variation {ldh_variation['entId']} for URN {variant_urn}.", extra=logging_ctx) - - return linked_data - - async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: logging_context = {} score_set = None @@ -913,7 +902,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: variant_urns = db.scalars( @@ -934,7 +923,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": True, "retried": False} + return {"success": True, "retried": False, "enqueued_job": None} logger.info( msg="Found current mapped variants with post mapped metadata for this score set. Attempting to link them to LDH submissions.", @@ -950,13 +939,13 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) # TODO#372: Non-nullable variant urns. blocking = functools.partial( - get_ldh_variation, + lambda urns: [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in urns], variant_urns, # type: ignore ) loop = asyncio.get_running_loop() @@ -971,12 +960,19 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: + linked_allele_ids = [ + (variant_urn, clingen_allele_id_from_ldh_variation(clingen_variation)) + for variant_urn, clingen_variation in linked_data + ] + + print(linked_allele_ids) + linkage_failures = [] - for variant_urn, ldh_variation in linked_data: - # XXX: Should we unlink variation if it is not found? + for variant_urn, ldh_variation in linked_allele_ids: + # XXX: Should we unlink variation if it is not found? Does this constitute a failure? if not ldh_variation: logger.warning( msg=f"Failed to link mapped variant {variant_urn} to LDH submission. No LDH variation found.", @@ -1013,7 +1009,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} try: num_linkage_failures = len(linkage_failures) @@ -1022,12 +1018,16 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in logging_context["linkage_failures"] = num_linkage_failures logging_context["linkage_successes"] = num_variant_urns - num_linkage_failures + assert ( + len(linked_allele_ids) == num_variant_urns + ), f"{num_variant_urns - len(linked_allele_ids)} appear to not have been attempted to be linked." + if not linkage_failures: logger.info( msg="Successfully linked all mapped variants to LDH submissions.", extra=logging_context, ) - return {"success": True, "retried": False} + return {"success": True, "retried": False, "enqueued_job": None} if ratio_failed_linking < LINKED_DATA_RETRY_THRESHOLD: logger.warning( @@ -1038,7 +1038,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in text=f"Failed to link {len(linkage_failures)} mapped variants to LDH submissions for score set {score_set.urn}." f"The retry threshold was not exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." ) - return {"success": True, "retried": False} + return {"success": True, "retried": False, "enqueued_job": None} except Exception as e: send_slack_error(e) @@ -1049,7 +1049,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) - return {"success": False, "retried": False} + return {"success": False, "retried": False, "enqueued_job": None} # If we reach this point, we should consider the job failed (there were failures which exceeded our retry threshold). new_job_id = None @@ -1079,7 +1079,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in ) send_slack_message( text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded and this job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." + f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." ) elif new_job_id is None and not max_retries_exceeded: logger.error( @@ -1088,7 +1088,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in ) send_slack_message( text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded but this job could not be retried. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." + f"This job could not be retried due to an unexpected issue while attempting to enqueue another linkage job. This was attempt {attempt}. URNs failed to link: {', '.join(linkage_failures)}." ) else: logger.error( @@ -1097,8 +1097,12 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in ) send_slack_message( text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." - f"The retry threshold was exceeded but this job has exceeded the maximum retry level. URNs failed to link: {', '.join(linkage_failures)}." + f"The retry threshold was exceeded and this job will not be retried. URNs failed to link: {', '.join(linkage_failures)}." ) finally: - return {"success": False, "retried": (not max_retries_exceeded and new_job_id is not None)} + return { + "success": False, + "retried": (not max_retries_exceeded and new_job_id is not None), + "enqueued_job": new_job_id, + } diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 7e141db0..dc07ec9f 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -15,7 +15,7 @@ from mavedb.data_providers.services import VRSMap from mavedb.lib.mave.constants import HGVS_NT_COLUMN from mavedb.lib.score_sets import csv_data_to_df -from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService +from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, clingen_allele_id_from_ldh_variation from mavedb.lib.validation.exceptions import ValidationError from mavedb.models.enums.mapping_state import MappingState from mavedb.models.enums.processing_state import ProcessingState @@ -32,12 +32,14 @@ map_variants_for_score_set, variant_mapper_manager, submit_score_set_mappings_to_ldh, + link_clingen_variants, ) from tests.helpers.constants import ( TEST_CDOT_TRANSCRIPT, TEST_CLINGEN_SUBMISSION_RESPONSE, TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, + TEST_CLINGEN_LDH_LINKING_RESPONSE, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, @@ -1540,7 +1542,7 @@ async def test_submit_score_set_mappings_to_ldh_exception_in_setup( ) with patch( - "src.mavedb.worker.jobs.setup_job_state", + "mavedb.worker.jobs.setup_job_state", side_effect=Exception(), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) @@ -1609,7 +1611,7 @@ async def test_submit_score_set_mappings_to_ldh_exception_in_hgvs_generation( ) with patch( - "src.mavedb.lib.variants.hgvs_from_mapped_variant", + "mavedb.lib.variants.hgvs_from_mapped_variant", side_effect=Exception(), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) @@ -1632,7 +1634,7 @@ async def test_submit_score_set_mappings_to_ldh_exception_in_ldh_submission_cons ) with patch( - "src.mavedb.lib.clingen.content_constructors.construct_ldh_submission", + "mavedb.lib.clingen.content_constructors.construct_ldh_submission", side_effect=Exception(), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) @@ -1771,3 +1773,318 @@ async def dummy_submission_job(): assert not result["success"] assert not result["retried"] assert not result["enqueued_job"] + + +############################################################################################################################################## +## ClinGen Linkage +############################################################################################################################################## + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_success( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id == clingen_allele_id_from_ldh_variation(TEST_CLINGEN_LDH_LINKING_RESPONSE) + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_in_setup( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + with patch( + "mavedb.worker.jobs.setup_job_state", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + for variant in session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ): + assert variant.clingen_allele_id is None + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_no_variants_to_link( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_during_linkage( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_linkages( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with patch( + "mavedb.lib.clingen.linked_data_hub.clingen_allele_id_from_ldh_variation", + side_effect=Exception(), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_but_do_not_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", + -1, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert result["retried"] + assert result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_cant_enqueue( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] + + +@pytest.mark.asyncio +async def test_link_score_set_mappings_to_ldh_objects_failures_exist_and_eclipse_retry_threshold_retries_exceeded( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants_with_mapping( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_linking_job(): + return [ + (variant_urn, None) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return + # value of the EventLoop itself, which would have made the request. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", + 1, + ), + patch( + "mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", + 0, + ), + patch( + "mavedb.worker.jobs.BACKOFF_LIMIT", + 1, + ), + ): + result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 2) + + assert not result["success"] + assert not result["retried"] + assert not result["enqueued_job"] From 82d5970155af3c5d1d83cc7e4bee162540419e1c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 28 Apr 2025 14:40:29 -0700 Subject: [PATCH 077/166] Test case fixes --- src/mavedb/lib/clingen/linked_data_hub.py | 2 +- src/mavedb/routers/score_sets.py | 11 +---- src/mavedb/worker/jobs.py | 2 +- .../lib/clingen/test_content_constructors.py | 4 +- tests/lib/clingen/test_linked_data_hub.py | 47 ++++++++++++------- tests/lib/test_variants.py | 2 +- tests/routers/test_score_set.py | 5 +- tests/worker/test_jobs.py | 2 +- 8 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index 9cf4dafb..c29eb210 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -237,7 +237,7 @@ def clingen_allele_id_from_ldh_variation(variation: Optional[dict]) -> Optional[ try: return variation["data"]["ldFor"]["Variant"][0]["entId"] - except KeyError as exc: + except (KeyError, IndexError) as exc: save_to_logging_context(format_raised_exception_info_as_dict(exc)) logger.error("Failed to extract ClinGen allele ID from variation data.", extra=logging_context()) return None diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index b4899212..db0a58c3 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -1198,16 +1198,7 @@ async def get_clinical_controls_for_score_set( clinical_controls_with_mapped_variant = [] for control_variant in clinical_controls_for_item: control_variant.mapped_variants = [ - # As of now, we only have linked clingen allele IDs for v1.3 VRS. Once v2.0 has been linked to clingen allele IDs, - # we can transition to the other filter. - # Staging filter - mv - for mv in control_variant.mapped_variants - if mv.vrs_version == "1.3" and mv.variant.score_set_id == item.id - # Production filter - # mv - # for mv in control_variant.mapped_variants - # if mv.current and mv.variant.score_set_id == item.id + mv for mv in control_variant.mapped_variants if mv.current and mv.variant.score_set_id == item.id ] if control_variant.mapped_variants: diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 9b6c9efe..a2e50295 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -1078,7 +1078,7 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in extra=logging_context, ) send_slack_message( - text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking} of total mapped variants for {score_set.urn})." + text=f"Failed to link {len(linkage_failures)} ({ratio_failed_linking*100}% of total mapped variants for {score_set.urn})." f"This job was successfully retried. This was attempt {attempt}. Retry will occur in {backoff_time} seconds. URNs failed to link: {', '.join(linkage_failures)}." ) elif new_job_id is None and not max_retries_exceeded: diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py index dc831413..35f73adc 100644 --- a/tests/lib/clingen/test_content_constructors.py +++ b/tests/lib/clingen/test_content_constructors.py @@ -90,7 +90,7 @@ def test_construct_ldh_submission(mock_variant, mock_mapped_variant): # Validate the first submission submission1 = result[0] - assert submission1["event"]["uuid"] == uuid_1 + assert submission1["event"]["uuid"] == str(uuid_1) assert submission1["event"]["sbj"]["id"] == TEST_HGVS_IDENTIFIER assert submission1["content"]["sbj"] == {"Variant": {"hgvs": TEST_HGVS_IDENTIFIER}} assert submission1["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == VALID_VARIANT_URN @@ -98,7 +98,7 @@ def test_construct_ldh_submission(mock_variant, mock_mapped_variant): # Validate the second submission submission2 = result[1] - assert submission2["event"]["uuid"] == uuid_2 + assert submission2["event"]["uuid"] == str(uuid_2) assert submission2["event"]["sbj"]["id"] == TEST_HGVS_IDENTIFIER assert submission2["content"]["sbj"] == {"Variant": {"hgvs": TEST_HGVS_IDENTIFIER}} assert submission2["content"]["ld"]["MaveDBMapping"][0]["entContent"]["mavedb_id"] == VALID_VARIANT_URN diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py index 17bbafc8..ba8d48b1 100644 --- a/tests/lib/clingen/test_linked_data_hub.py +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -7,7 +7,13 @@ from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.utils import batched -from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService, get_clingen_variation +from mavedb.lib.clingen.linked_data_hub import ( + ClinGenLdhService, + get_clingen_variation, + clingen_allele_id_from_ldh_variation, +) + +from tests.helpers.constants import VALID_CLINGEN_CA_ID TEST_CLINGEN_URL = "https://pytest.clingen.com" @@ -209,15 +215,16 @@ def test_dispatch_submissions_no_batching( @patch("mavedb.lib.clingen.linked_data_hub.requests.get") def test_get_clingen_variation_success(mock_get): + mocked_response_json = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} mock_response = MagicMock() mock_response.status_code = 200 - mock_response.json.return_value = {"data": {"ldFor": {"Variant": [{"id": "variant_1", "name": "Test Variant"}]}}} + mock_response.json.return_value = mocked_response_json mock_get.return_value = mock_response urn = "urn:example:variant" result = get_clingen_variation(urn) - assert result == {"id": "variant_1", "name": "Test Variant"} + assert result == mocked_response_json mock_get.assert_called_once_with( f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, @@ -241,18 +248,26 @@ def test_get_clingen_variation_failure(mock_get): ) -@patch("mavedb.lib.clingen.linked_data_hub.requests.get") -def test_get_clingen_variation_invalid_response(mock_get): - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = {"data": {"ldFor": {}}} # Missing "Variant" key - mock_get.return_value = mock_response +def test_clingen_allele_id_from_ldh_variation_success(): + variation = {"data": {"ldFor": {"Variant": [{"entId": VALID_CLINGEN_CA_ID}]}}} + result = clingen_allele_id_from_ldh_variation(variation) + assert result == VALID_CLINGEN_CA_ID - urn = "urn:example:variant" - with pytest.raises(KeyError): - get_clingen_variation(urn) - mock_get.assert_called_once_with( - f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", - headers={"Accept": "application/json"}, - ) +def test_clingen_allele_id_from_ldh_variation_missing_key(): + variation = {"data": {"ldFor": {"Variant": []}}} + + result = clingen_allele_id_from_ldh_variation(variation) + assert result is None + + +def test_clingen_allele_id_from_ldh_variation_no_variation(): + result = clingen_allele_id_from_ldh_variation(None) + assert result is None + + +def test_clingen_allele_id_from_ldh_variation_key_error(): + variation = {"data": {}} + + result = clingen_allele_id_from_ldh_variation(variation) + assert result is None diff --git a/tests/lib/test_variants.py b/tests/lib/test_variants.py index 7571a5fa..92c5791f 100644 --- a/tests/lib/test_variants.py +++ b/tests/lib/test_variants.py @@ -64,5 +64,5 @@ def test_hgvs_from_mapped_variant_invalid_type(): def test_hgvs_from_mapped_variant_invalid_structure(): mapped_variant = MagicMock() mapped_variant.post_mapped = {"invalid_key": "InvalidType"} - with pytest.raises(ValueError): + with pytest.raises(KeyError): hgvs_from_mapped_variant(mapped_variant) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 773c26b2..1ce59e1d 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -2464,7 +2464,10 @@ def test_can_fetch_current_clinical_controls_for_score_set_with_parameters( query_string = "?" for param, accessor in parameters: - query_string += f"&{param}={clinical_control[accessor]}" + query_string += f"{param}={clinical_control[accessor]}&" + + # Remove the last '&' from the query string + query_string = query_string.strip("&") response = client.get(f"/api/v1/score-sets/{score_set['urn']}/clinical-controls{query_string}") assert response.status_code == 200 diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index dc07ec9f..44ed6152 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -1947,7 +1947,7 @@ async def dummy_linking_job(): ), patch( "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", - -1, + 2, ), ): result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) From ae8ad7c03787bd74f51629c172cfb3e69317822a Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 28 Apr 2025 16:10:04 -0700 Subject: [PATCH 078/166] Use ubuntu-latest for 3.9 Tests ubuntu-20.04 now discontinued on git actions --- .github/workflows/run-tests-on-push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 965ddfb3..ad6359cd 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -7,8 +7,8 @@ env: jobs: run-tests-3_9: - runs-on: ubuntu-20.04 - name: Pytest on Python 3.9 / Ubuntu 20.04 + runs-on: ubuntu-latest + name: Pytest on Python 3.9 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From b84b17a44dbe5d5b06f46b50029374d77728c182 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 28 Apr 2025 18:12:00 -0700 Subject: [PATCH 079/166] Make Mapping Job Enqueue ClinGen Submission Job --- src/mavedb/lib/exceptions.py | 6 +++++ src/mavedb/worker/jobs.py | 43 +++++++++++++++++++++++++++------ tests/conftest.py | 16 +++++++++++-- tests/worker/test_jobs.py | 46 +++++++++++++++++++++++++++++++++--- 4 files changed, 99 insertions(+), 12 deletions(-) diff --git a/src/mavedb/lib/exceptions.py b/src/mavedb/lib/exceptions.py index 026c6800..46380e96 100644 --- a/src/mavedb/lib/exceptions.py +++ b/src/mavedb/lib/exceptions.py @@ -180,6 +180,12 @@ class MappingEnqueueError(ValueError): pass +class SubmissionEnqueueError(ValueError): + """Raised when a linking job fails to be enqueued despite appearing as if it should have been""" + + pass + + class LinkingEnqueueError(ValueError): """Raised when a linking job fails to be enqueued despite appearing as if it should have been""" diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index a2e50295..397e0eda 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -28,6 +28,7 @@ ) from mavedb.lib.exceptions import ( MappingEnqueueError, + SubmissionEnqueueError, LinkingEnqueueError, NonexistentMappingReferenceError, NonexistentMappingResultsError, @@ -557,8 +558,38 @@ async def map_variants_for_score_set( db.commit() return {"success": False, "retried": (not max_retries_exceeded and new_job_id is not None)} + new_job_id = None + try: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_ldh", + correlation_id, + score_set.id, + ) + + if new_job: + new_job_id = new_job.job_id + + logging_context["submit_clingen_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + + else: + raise SubmissionEnqueueError() + + except Exception as e: + send_slack_error(e) + send_slack_message( + f"Could not submit mappings to LDH for score set {score_set.urn}. Mappings for this score set should be submitted manually." + ) + logging_context = {**logging_context, **format_raised_exception_info_as_dict(e)} + logger.error( + msg="Mapped variant ClinGen submission encountered an unexpected error while attempting to enqueue a submission job. This job will not be retried.", + extra=logging_context, + ) + + return {"success": False, "retried": False, "enqueued_job": new_job_id} + ctx["state"][ctx["job_id"]] = logging_context.copy() - return {"success": True, "retried": False} + return {"success": True, "retried": False, "enqueued_job": new_job_id} async def variant_mapper_manager(ctx: dict, correlation_id: str, updater_id: int, attempt: int = 1) -> dict: @@ -836,14 +867,14 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": False, "retried": False, "enqueued_job": None} - new_job = None + new_job_id = None try: new_job = await redis.enqueue_job( "link_clingen_variants", correlation_id, score_set.id, 1, - defer_by=timedelta(minutes=LINKING_BACKOFF_IN_SECONDS), + _defer_by=timedelta(minutes=LINKING_BACKOFF_IN_SECONDS), ) if new_job: @@ -864,9 +895,9 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score extra=logging_context, ) - return {"success": False, "retried": False, "enqueued_job": new_job} + return {"success": False, "retried": False, "enqueued_job": new_job_id} - return {"success": True, "retried": False, "enqueued_job": new_job} + return {"success": True, "retried": False, "enqueued_job": new_job_id} async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: @@ -968,8 +999,6 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in for variant_urn, clingen_variation in linked_data ] - print(linked_allele_ids) - linkage_failures = [] for variant_urn, ldh_variation in linked_allele_ids: # XXX: Should we unlink variation if it is not found? Does this constitute a failure? diff --git a/tests/conftest.py b/tests/conftest.py index e5d55a32..c16ef610 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,13 @@ from mavedb.lib.authorization import require_current_user from mavedb.models.user import User from mavedb.server_main import app -from mavedb.worker.jobs import create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager +from mavedb.worker.jobs import ( + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, + submit_score_set_mappings_to_ldh, + link_clingen_variants, +) sys.path.append(".") @@ -160,7 +166,13 @@ async def on_job(ctx): ctx["pool"] = futures.ProcessPoolExecutor() worker_ = Worker( - functions=[create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager], + functions=[ + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, + submit_score_set_mappings_to_ldh, + link_clingen_variants, + ], redis_pool=arq_redis, burst=True, poll_delay=0, diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 44ed6152..dde19500 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -121,6 +121,8 @@ async def dummy_mapping_job(): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) assert result["success"] + assert not result["retried"] + assert result["enqueued_job"] is not None return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() @@ -462,6 +464,13 @@ async def test_create_variants_for_score_set_enqueues_manager_and_successful_map async def dummy_mapping_job(): return await setup_mapping_output(async_client, session, score_set) + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + # Variants have not yet been created, so infer their URNs. + async def dummy_linking_job(): + return [(f"{score_set_urn}#{i}", TEST_CLINGEN_LDH_LINKING_RESPONSE) for i in range(1, len(scores) + 1)] + with ( patch.object( cdot.hgvs.dataproviders.RESTDataProvider, @@ -471,9 +480,11 @@ async def dummy_mapping_job(): patch.object( _UnixSelectorEventLoop, "run_in_executor", - return_value=dummy_mapping_job(), + side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts) await arq_worker.async_run() @@ -583,6 +594,7 @@ async def dummy_mapping_job(): assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" assert result["success"] assert not result["retried"] + assert result["enqueued_job"] is not None assert len(mapped_variants_for_score_set) == score_set.num_variants assert score_set.mapping_state == MappingState.complete assert score_set.mapping_errors is None @@ -656,6 +668,7 @@ async def dummy_mapping_job(): assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" assert result["success"] assert not result["retried"] + assert result["enqueued_job"] is not None assert len(mapped_variants_for_score_set) == score_set.num_variants + 1 assert len(preexisting_variants) == 1 assert len(new_variants) == score_set.num_variants @@ -1048,6 +1061,7 @@ async def dummy_mapping_job(): assert (await standalone_worker_context["redis"].get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" assert result["success"] assert not result["retried"] + assert result["enqueued_job"] is not None assert len(mapped_variants_for_score_set) == 0 assert score_set.mapping_state == MappingState.failed @@ -1381,6 +1395,17 @@ async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping( async def dummy_mapping_job(): return await setup_mapping_output(async_client, session, score_set) + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine # object that sets up test mappingn output. @@ -1388,9 +1413,11 @@ async def dummy_mapping_job(): patch.object( _UnixSelectorEventLoop, "run_in_executor", - return_value=dummy_mapping_job(), + side_effect=[dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() @@ -1425,6 +1452,17 @@ async def failed_mapping_job(): async def dummy_mapping_job(): return await setup_mapping_output(async_client, session, score_set) + async def dummy_submission_job(): + return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] + + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine # object that sets up test mappingn output. @@ -1432,9 +1470,11 @@ async def dummy_mapping_job(): patch.object( _UnixSelectorEventLoop, "run_in_executor", - side_effect=[failed_mapping_job(), dummy_mapping_job()], + side_effect=[failed_mapping_job(), dummy_mapping_job(), dummy_submission_job(), dummy_linking_job()], ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), ): await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() From 903461e6d2d71e78c62ea203a590943693915e9b Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 28 Apr 2025 21:15:39 -0700 Subject: [PATCH 080/166] Dont backoff clingen submission requests --- src/mavedb/lib/clingen/linked_data_hub.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index c29eb210..34718d40 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -12,7 +12,7 @@ from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_LINKED_DATA_URL from mavedb.lib.types.clingen import LdhSubmission -from mavedb.lib.utils import batched, request_with_backoff +from mavedb.lib.utils import batched logger = logging.getLogger(__name__) @@ -143,12 +143,13 @@ def dispatch_submissions( logger.info(msg=f"Dispatching {len(submissions)} ldh submissions...", extra=logging_context()) for idx, content in enumerate(submissions): try: - response = request_with_backoff( - method="PUT", + logger.debug(msg=f"Dispatching submission {idx+1}.", extra=logging_context()) + response = requests.put( url=self.url, json=content, headers={"Authorization": f"Bearer {self.authenticate()}", "Content-Type": "application/json"}, ) + response.raise_for_status() submission_successes.append(response.json()) logger.info( msg=f"Successfully dispatched ldh submission ({idx+1} / {len(submissions)}).", From f3c56fd41b60e1bdd0450f0dd089409cf2ec2957 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 29 Apr 2025 08:28:38 -0700 Subject: [PATCH 081/166] Defer linking job in seconds --- src/mavedb/worker/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 397e0eda..c647944e 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -874,7 +874,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score correlation_id, score_set.id, 1, - _defer_by=timedelta(minutes=LINKING_BACKOFF_IN_SECONDS), + _defer_by=timedelta(seconds=LINKING_BACKOFF_IN_SECONDS), ) if new_job: From 95beefd9dc8e6dd0813e024bfc826a2b5cc95a17 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 29 Apr 2025 09:05:11 -0700 Subject: [PATCH 082/166] Mock Requests.put in tests rather than outdate request_with_backoff --- tests/lib/clingen/test_linked_data_hub.py | 39 ++++++++++------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py index ba8d48b1..43dd80fd 100644 --- a/tests/lib/clingen/test_linked_data_hub.py +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -121,14 +121,12 @@ def test_existing_jwt_not_set(self, mock_getenv, clingen_service): ### Test the dispatch_submissions method - @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.requests.put") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") @patch("mavedb.lib.clingen.linked_data_hub.batched") - def test_dispatch_submissions_success( - self, mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service - ): + def test_dispatch_submissions_success(self, mock_batched, mock_authenticate, mock_request, clingen_service): mock_authenticate.return_value = "test_jwt_token" - mock_request_with_backoff.return_value.json.return_value = {"success": True} + mock_request.return_value.json.return_value = {"success": True} content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] mock_batched.return_value = [[{"id": 1}, {"id": 2}], [{"id": 3}]] # Simulate batching @@ -140,18 +138,17 @@ def test_dispatch_submissions_success( assert len(failures) == 0 mock_batched.assert_called_once_with(content_submissions, 2) for submission in batched(content_submissions, batch_size): - mock_request_with_backoff.assert_any_call( - method="PUT", + mock_request.assert_any_call( url=clingen_service.url, json=submission, headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, ) - @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.requests.put") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") - def test_dispatch_submissions_failure(self, mock_authenticate, mock_request_with_backoff, clingen_service): + def test_dispatch_submissions_failure(self, mock_authenticate, mock_request, clingen_service): mock_authenticate.return_value = "test_jwt_token" - mock_request_with_backoff.side_effect = requests.exceptions.RequestException("Request failed") + mock_request.side_effect = requests.exceptions.RequestException("Request failed") content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] @@ -160,24 +157,23 @@ def test_dispatch_submissions_failure(self, mock_authenticate, mock_request_with assert len(successes) == 0 assert len(failures) == 3 for submission in content_submissions: - mock_request_with_backoff.assert_any_call( - method="PUT", + mock_request.assert_any_call( url=clingen_service.url, json=submission, headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, ) - @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.requests.put") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") - def test_dispatch_submissions_partial_success(self, mock_authenticate, mock_request_with_backoff, clingen_service): + def test_dispatch_submissions_partial_success(self, mock_authenticate, mock_request, clingen_service): mock_authenticate.return_value = "test_jwt_token" - def mock_request_with_backoff_side_effect(*args, **kwargs): + def mock_request_side_effect(*args, **kwargs): if kwargs["json"]["id"] == 2: raise requests.exceptions.RequestException("Request failed") return MagicMock(json=MagicMock(return_value={"success": True})) - mock_request_with_backoff.side_effect = mock_request_with_backoff_side_effect + mock_request.side_effect = mock_request_side_effect content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] @@ -187,14 +183,12 @@ def mock_request_with_backoff_side_effect(*args, **kwargs): assert len(failures) == 1 assert failures[0]["id"] == 2 - @patch("mavedb.lib.clingen.linked_data_hub.request_with_backoff") + @patch("mavedb.lib.clingen.linked_data_hub.requests.put") @patch("mavedb.lib.clingen.linked_data_hub.ClinGenLdhService.authenticate") @patch("mavedb.lib.clingen.linked_data_hub.batched") - def test_dispatch_submissions_no_batching( - self, mock_batched, mock_authenticate, mock_request_with_backoff, clingen_service - ): + def test_dispatch_submissions_no_batching(self, mock_batched, mock_authenticate, mock_request, clingen_service): mock_authenticate.return_value = "test_jwt_token" - mock_request_with_backoff.return_value.json.return_value = {"success": True} + mock_request.return_value.json.return_value = {"success": True} content_submissions = [{"id": 1}, {"id": 2}, {"id": 3}] mock_batched.return_value = content_submissions # No batching @@ -205,8 +199,7 @@ def test_dispatch_submissions_no_batching( assert len(failures) == 0 mock_batched.assert_not_called() for submission in content_submissions: - mock_request_with_backoff.assert_any_call( - method="PUT", + mock_request.assert_any_call( url=clingen_service.url, json=submission, headers={"Authorization": "Bearer test_jwt_token", "Content-Type": "application/json"}, From 708295868e4502955ba7535a74150474b27fe736 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 29 Apr 2025 09:16:18 -0700 Subject: [PATCH 083/166] Use Named Function for Linkage Job --- src/mavedb/worker/jobs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index c647944e..5a55ea75 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -974,9 +974,13 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in try: logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) + + def all_clingen_variation(variant_urns): + return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] + # TODO#372: Non-nullable variant urns. blocking = functools.partial( - lambda urns: [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in urns], + all_clingen_variation, variant_urns, # type: ignore ) loop = asyncio.get_running_loop() From 43ed08bb748614bd1713a2c041cdd74d58014d0c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 29 Apr 2025 11:53:58 -0700 Subject: [PATCH 084/166] Define clingen fetch at top level of module --- src/mavedb/worker/jobs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 5a55ea75..d30064ff 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -900,6 +900,10 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": True, "retried": False, "enqueued_job": new_job_id} +def do_clingen_fetch(variant_urns): + return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] + + async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: int, attempt: int) -> dict: logging_context = {} score_set = None @@ -975,12 +979,9 @@ async def link_clingen_variants(ctx: dict, correlation_id: str, score_set_id: in try: logger.info(msg="Attempting to link mapped variants to LDH submissions.", extra=logging_context) - def all_clingen_variation(variant_urns): - return [(variant_urn, get_clingen_variation(variant_urn)) for variant_urn in variant_urns] - # TODO#372: Non-nullable variant urns. blocking = functools.partial( - all_clingen_variation, + do_clingen_fetch, variant_urns, # type: ignore ) loop = asyncio.get_running_loop() From a32335b56eb2decc705b5a045dbbadec872bc1fc Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 6 May 2025 09:26:50 +1000 Subject: [PATCH 085/166] Modify the mapped variants' filter condition so that it works correctly. --- src/mavedb/routers/mapped_variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/routers/mapped_variant.py b/src/mavedb/routers/mapped_variant.py index 05b9f6ad..31f85ef4 100644 --- a/src/mavedb/routers/mapped_variant.py +++ b/src/mavedb/routers/mapped_variant.py @@ -26,7 +26,7 @@ async def fetch_mapped_variant_by_variant_urn(db, urn: str) -> Optional[MappedVa db.query(MappedVariant) .filter(Variant.urn == urn) .filter(MappedVariant.variant_id == Variant.id) - .filter(MappedVariant.current is True) + .filter(MappedVariant.current == True) .one_or_none() ) except MultipleResultsFound: From 5884925c06147e0f7d53c5b3a9bfdef5619d0588 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 6 May 2025 09:32:19 +1000 Subject: [PATCH 086/166] Fix poetry run ruff check error. --- src/mavedb/routers/mapped_variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/routers/mapped_variant.py b/src/mavedb/routers/mapped_variant.py index 31f85ef4..1fab6d1b 100644 --- a/src/mavedb/routers/mapped_variant.py +++ b/src/mavedb/routers/mapped_variant.py @@ -26,7 +26,7 @@ async def fetch_mapped_variant_by_variant_urn(db, urn: str) -> Optional[MappedVa db.query(MappedVariant) .filter(Variant.urn == urn) .filter(MappedVariant.variant_id == Variant.id) - .filter(MappedVariant.current == True) + .filter(MappedVariant.current) # filter current is true .one_or_none() ) except MultipleResultsFound: From 5eb9e9f6eb7f2bc015ccaad7527509163fce1f61 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 6 May 2025 10:02:23 +1000 Subject: [PATCH 087/166] The Ubuntu 20.04 Actions runner image will begin deprecation on 2025-02-01 and will be fully unsupported by 2025-04-15. Workflows using the ubuntu-20.04 image label should be updated to ubuntu-latest, ubuntu-22.04, ubuntu-24.04. --- .github/workflows/run-tests-on-push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 965ddfb3..5cc51011 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -7,8 +7,8 @@ env: jobs: run-tests-3_9: - runs-on: ubuntu-20.04 - name: Pytest on Python 3.9 / Ubuntu 20.04 + runs-on: ubuntu-24.04 + name: Pytest on Python 3.9 / Ubuntu 24.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From cd96d071763438da99aca573d82dd65d3cf2222d Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 25 Feb 2025 17:26:28 -0800 Subject: [PATCH 088/166] Fix Docker Casing Warnings --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index b76802f3..0acd8c06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # python-base # Set up shared environment variables ################################ -FROM python:3.9 as python-base +FROM python:3.9 AS python-base # Poetry # https://python-poetry.org/docs/configuration/#using-environment-variables @@ -69,7 +69,7 @@ RUN samtools faidx GCF_000001405.39_GRCh38.p13_genomic.fna.gz # builder # Builds application dependencies and creates venv ################################ -FROM python-base as builder +FROM python-base AS builder WORKDIR /code @@ -90,7 +90,7 @@ COPY src/mavedb/server_main.py /code/main.py # worker # Worker image ################################ -FROM builder as worker +FROM builder AS worker COPY --from=downloader /data /data # copy pre-built poetry + venv @@ -103,7 +103,7 @@ CMD ["arq", "mavedb.worker.WorkerSettings"] # application # Application image ################################ -FROM builder as application +FROM builder AS application COPY --from=downloader /data /data # copy pre-built poetry + venv From 0ab25585a8b487c018ac19b6736a5600a985e804 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:24:15 -0800 Subject: [PATCH 089/166] Refactor Dataframe Validation Logic Refactors dataframe validation logic into 3 component files: column.py, dataframe.py, and variant.py. This simplifies the validation structure and logically separates validation function based on the part of the df they operate on. --- src/mavedb/lib/validation/__init__.py | 0 src/mavedb/lib/validation/constants/target.py | 1 + src/mavedb/lib/validation/dataframe.py | 782 ------------------ src/mavedb/lib/validation/dataframe/column.py | 252 ++++++ .../lib/validation/dataframe/dataframe.py | 371 +++++++++ .../lib/validation/dataframe/variant.py | 329 ++++++++ src/mavedb/lib/validation/py.typed | 0 src/mavedb/worker/jobs.py | 2 +- 8 files changed, 954 insertions(+), 783 deletions(-) create mode 100644 src/mavedb/lib/validation/__init__.py delete mode 100644 src/mavedb/lib/validation/dataframe.py create mode 100644 src/mavedb/lib/validation/dataframe/column.py create mode 100644 src/mavedb/lib/validation/dataframe/dataframe.py create mode 100644 src/mavedb/lib/validation/dataframe/variant.py create mode 100644 src/mavedb/lib/validation/py.typed diff --git a/src/mavedb/lib/validation/__init__.py b/src/mavedb/lib/validation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/lib/validation/constants/target.py b/src/mavedb/lib/validation/constants/target.py index f64b4bd4..0741be09 100644 --- a/src/mavedb/lib/validation/constants/target.py +++ b/src/mavedb/lib/validation/constants/target.py @@ -1 +1,2 @@ valid_sequence_types = ["infer", "dna", "protein"] +strict_valid_sequence_types = ["dna", "protein"] diff --git a/src/mavedb/lib/validation/dataframe.py b/src/mavedb/lib/validation/dataframe.py deleted file mode 100644 index 2d7bdffc..00000000 --- a/src/mavedb/lib/validation/dataframe.py +++ /dev/null @@ -1,782 +0,0 @@ -from typing import Optional, Tuple, Union - -import hgvs.exceptions -import hgvs.parser -import hgvs.validator -import numpy as np -import pandas as pd -from cdot.hgvs.dataproviders import RESTDataProvider -from fqfa.util.translate import translate_dna -from mavehgvs.exceptions import MaveHgvsParseError -from mavehgvs.variant import Variant - -from mavedb.lib.exceptions import MixedTargetError -from mavedb.lib.validation.constants.general import ( - hgvs_nt_column, - hgvs_pro_column, - hgvs_splice_column, - required_score_column, -) -from mavedb.lib.validation.exceptions import ValidationError -from mavedb.models.target_accession import TargetAccession -from mavedb.models.target_gene import TargetGene -from mavedb.models.target_sequence import TargetSequence - -# handle with pandas all null strings -# provide a csv or a pandas dataframe -# take dataframe, output as csv to temp directory, use standard library - - -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) - - -def infer_column_type(col: pd.Series) -> str: - """Infer whether the given column contains string or numeric data. - - The function returns "string" for string columns or "numeric" for numeric columns. - If there is a mixture of types it returns "mixed". - If every value in the column is `None` or NA it returns "empty". - - Parameters - ---------- - col : pandas.Series - The column to inspect - - Returns - ------- - str - One of "string", "numeric", "mixed", or "empty" - """ - if col.isna().all(): - return "empty" - else: - col_numeric = pd.to_numeric(col, errors="coerce") - if col_numeric.isna().all(): # nothing converted to a number - return "string" - elif np.all(col.isna() == col_numeric.isna()): # all non-NA values converted - return "numeric" - else: # some values converted but not all - return "mixed" - - -def sort_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame: - """Sort the columns of the given dataframe according to the expected ordering in MaveDB. - - MaveDB expects that dataframes have columns in the following order (note some columns are optional): - * hgvs_nt - * hgvs_splice - * hgvs_pro - * score - * other - - Parameters - ---------- - df : pandas.DataFrame - The dataframe with columns to sort - - Returns - ------- - pandas.DataFrame - The dataframe with the same data but sorted columns - """ - - def column_sort_function(value, columns): - if value.lower() in STANDARD_COLUMNS: - return STANDARD_COLUMNS.index(value.lower()) - else: - return columns.index(value) + len(STANDARD_COLUMNS) - - old_columns = list(df.columns) - new_columns = sorted(old_columns, key=lambda v: column_sort_function(v, old_columns)) - - return df[new_columns] - - -def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: - """Standardize a dataframe by sorting the columns and changing the standard column names to lowercase. - - The standard column names are: - * hgvs_nt - * hgvs_splice - * hgvs_pro - * score - - Case for other columns is preserved. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to standardize - - Returns - ------- - pandas.DataFrame - The standardized dataframe - """ - column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS} - - df.rename(columns=column_mapper, inplace=True) - - return sort_dataframe_columns(df) - - -def validate_and_standardize_dataframe_pair( - scores_df: pd.DataFrame, counts_df: Optional[pd.DataFrame], targets: list[TargetGene], hdp: RESTDataProvider -) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: - """ - Perform validation and standardization on a pair of score and count dataframes. - - Parameters - ---------- - scores_df : pandas.DataFrame - The scores dataframe - counts_df : Optional[pandas.DataFrame] - The counts dataframe, can be None if not present - targets : str - The target genes on which to validate dataframes - hdp : RESTDataProvider - The biocommons.hgvs compatible data provider. Used to fetch sequences for hgvs validation. - - Returns - ------- - Tuple[pd.DataFrame, Optional[pd.DataFrame]] - The standardized score and count dataframes, or score and None if no count dataframe was provided - - Raises - ------ - ValidationError - If one of the validation functions raises an exception - """ - if not targets: - raise ValueError("Can't validate provided file with no targets.") - - validate_dataframe(scores_df, "scores", targets, hdp) - if counts_df is not None: - validate_dataframe(counts_df, "counts", targets, hdp) - validate_variant_columns_match(scores_df, counts_df) - - new_scores_df = standardize_dataframe(scores_df) - new_counts_df = standardize_dataframe(counts_df) if counts_df is not None else None - return new_scores_df, new_counts_df - - -def validate_dataframe(df: pd.DataFrame, kind: str, targets: list["TargetGene"], hdp: RESTDataProvider) -> None: - """ - Validate that a given dataframe passes all checks. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to validate - kind : str - The kind of dataframe "counts" or "scores" - target_seq : str - The target sequence to validate variants against - target_seq_type : str - The kind of target sequence, one of "infer" "dna" or "protein" - - Returns - ------- - None - - Raises - ------ - ValidationError - If one of the validators called raises an exception - """ - # basic checks - validate_column_names(df, kind) - validate_no_null_rows(df) - - column_mapping = {c.lower(): c for c in df.columns} - index_column = choose_dataframe_index_column(df) - - prefixes: dict[str, Optional[str]] = dict() - for c in column_mapping: - if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - is_index = column_mapping[c] == index_column - prefixes[c] = None - - # Ignore validation for null non-index hgvs columns - if df[column_mapping[c]].isna().all() and not is_index: - continue - - score_set_is_accession_based = all(target.target_accession for target in targets) - score_set_is_sequence_based = all(target.target_sequence for target in targets) - - # This is typesafe, despite Pylance's claims otherwise - if score_set_is_accession_based and not score_set_is_sequence_based: - validate_hgvs_genomic_column( - df[column_mapping[c]], - is_index, - [target.target_accession for target in targets], - hdp, # type: ignore - ) - elif score_set_is_sequence_based and not score_set_is_accession_based: - validate_hgvs_transgenic_column( - df[column_mapping[c]], - is_index, - {target.target_sequence.label: target.target_sequence for target in targets}, # type: ignore - ) - else: - raise MixedTargetError("Could not validate dataframe against provided mixed target types.") - - # post validation, handle prefixes. We've already established these columns are non-null - if score_set_is_accession_based or len(targets) > 1: - prefixes[c] = ( - df[column_mapping[c]].dropna()[0].split(" ")[0].split(":")[1][0] - ) # Just take the first prefix, we validate consistency elsewhere - else: - prefixes[c] = df[column_mapping[c]].dropna()[0][0] - - else: - force_numeric = (c == required_score_column) or (kind == "counts") - validate_data_column(df[column_mapping[c]], force_numeric) - - validate_hgvs_prefix_combinations( - hgvs_nt=prefixes[hgvs_nt_column], - hgvs_splice=prefixes[hgvs_splice_column], - hgvs_pro=prefixes[hgvs_pro_column], - transgenic=all(target.target_sequence for target in targets), - ) - - -def validate_column_names(df: pd.DataFrame, kind: str) -> None: - """Validate the column names in a dataframe. - - This function validates the column names in the given dataframe. - It can be run for either a "scores" dataframe or a "counts" dataframe. - A "scores" dataframe must have a column named 'score' and a "counts" dataframe cannot have a column named 'score'. - - The function also checks for a valid combination of columns that define variants. - - Basic checks are performed to make sure that a column name is not empty, null, or whitespace, - as well as making sure there are no duplicate column names. - - Parameters - ---------- - df : pandas.DataFrame - The scores or counts dataframe to be validated - - kind : str - Either "counts" or "scores" depending on the kind of dataframe being validated - - Raises - ------ - ValidationError - If the column names are not valid - """ - if any(type(c) is not str for c in df.columns): - raise ValidationError("column names must be strings") - - if any(c.isspace() for c in df.columns) or any(len(c) == 0 for c in df.columns): - raise ValidationError("column names cannot be empty or whitespace") - - columns = [c.lower() for c in df.columns] - - if kind == "scores": - if required_score_column not in columns: - raise ValidationError(f"score dataframe must have a '{required_score_column}' column") - elif kind == "counts": - if required_score_column in columns: - raise ValidationError(f"counts dataframe must not have a '{required_score_column}' column") - else: - raise ValueError("kind only accepts scores and counts") - - if hgvs_splice_column in columns: - if hgvs_nt_column not in columns or hgvs_pro_column not in columns: - raise ValidationError( - f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" - ) - - if len(columns) != len(set(columns)): - raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") - - if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): - raise ValidationError("dataframe does not define any variant columns") - - if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): - raise ValidationError("dataframe does not define any data columns") - - -def validate_no_null_rows(df: pd.DataFrame) -> None: - """Check that there are no fully null rows in the dataframe. - - Parameters - __________ - df : pandas.DataFrame - The scores or counts dataframe being validated - - Raises - ______ - ValidationError - If there are null rows in the dataframe - """ - if any(df.isnull().all(axis=1)): - raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") - - -def choose_dataframe_index_column(df: pd.DataFrame) -> str: - """ - Identify the HGVS variant column that should be used as the index column in this dataframe. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to check - - Returns - ------- - str - The column name of the index column - - Raises - ------ - ValidationError - If no valid HGVS variant column is found - """ - column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} - - if hgvs_nt_column in column_mapping: - return column_mapping[hgvs_nt_column] - elif hgvs_pro_column in column_mapping: - return column_mapping[hgvs_pro_column] - else: - raise ValidationError("failed to find valid HGVS variant column") - - -def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: dict[str, "TargetSequence"]) -> None: - """ - Validate the variants in an HGVS column from a dataframe. - - Tests whether the column has a correct and consistent prefix. - This function also validates all individual variants in the column and checks for agreement against the target - sequence (for non-splice variants). - - Implementation NOTE: We assume variants will only be presented as fully qualified (accession:variant) - if this column is being validated against multiple targets. - - Parameters - ---------- - column : pd.Series - The column from the dataframe to validate - is_index : bool - True if this is the index column for the dataframe and therefore cannot have missing values; else False - targets : dict - Dictionary containing a mapping of target gene names to their sequences. - - Returns - ------- - None - - Raises - ------ - ValueError - If the target sequence does is not dna or protein (or inferred as dna or protein) - ValueError - If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) - ValidationError - If one of the variants fails validation - """ - valid_sequence_types = ("dna", "protein") - validate_variant_column(column, is_index) - prefixes = generate_variant_prefixes(column) - validate_variant_formatting(column, prefixes, list(targets.keys()), len(targets) > 1) - - observed_sequence_types = [target.sequence_type for target in targets.values()] - invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) - if invalid_sequence_types: - raise ValueError( - f"Some targets are invalid sequence types: {invalid_sequence_types}. Sequence types shoud be one of: {valid_sequence_types}" - ) - - # If this is the `hgvs_nt` column, at least one target should be of type `dna`. - if str(column.name).lower() == hgvs_nt_column: - if "dna" not in observed_sequence_types: - raise ValueError( - f"invalid target sequence type(s) for '{column.name}'. At least one target should be of type `dna`. Observed types: {observed_sequence_types}" - ) - - # Make sure this column is either the splice column or protein column. - elif str(column.name).lower() != hgvs_splice_column and str(column.name).lower() != hgvs_pro_column: - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - # Build dictionary of target sequences based on the column we are validating. - target_seqs: dict[str, Union[str, None]] = {} - for name, target in targets.items(): - if str(column.name).lower() == hgvs_nt_column: - target_seqs[name] = target.sequence - - # don't validate splice columns against provided sequences. - elif str(column.name).lower() == hgvs_splice_column: - target_seqs[name] = None - - # translate the target sequence if needed. - elif str(column.name).lower() == hgvs_pro_column: - if target.sequence_type == "dna" and target.sequence is not None: - target_seqs[name] = translate_dna(target.sequence)[0] - else: - target_seqs[name] = target.sequence - - # get a list of all invalid variants - invalid_variants = list() - for i, s in column.items(): - if not s: - continue - - # variants can exist on the same line separated by a space - for variant in s.split(" "): - # When there are multiple targets, treat provided variants as fully qualified. - if len(targets) > 1: - name, variant = str(variant).split(":") - else: - name = list(targets.keys())[0] - if variant is not None: - try: - Variant(variant, targetseq=target_seqs[name]) - except MaveHgvsParseError: - try: - Variant(variant) # note this will get called a second time for splice variants - except MaveHgvsParseError: - invalid_variants.append(f"invalid variant string '{variant}' at row {i} for sequence {name}") - else: - invalid_variants.append( - f"target sequence mismatch for '{variant}' at row {i} for sequence {name}" - ) - - # format and raise an error message that contains all invalid variants - if len(invalid_variants) > 0: - raise ValidationError( - f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants - ) - - -def validate_hgvs_genomic_column( - column: pd.Series, is_index: bool, targets: list["TargetAccession"], hdp: RESTDataProvider -) -> None: - """ - Validate the variants in an HGVS column from a dataframe. - - Tests whether the column has a correct and consistent prefix. - This function also validates all individual variants in the column and checks for agreement against the target - sequence (for non-splice variants). - - Parameters - ---------- - column : pd.Series - The column from the dataframe to validate - is_index : bool - True if this is the index column for the dataframe and therefore cannot have missing values; else False - targets : list - Dictionary containing a list of target accessions. - - Returns - ------- - None - - Raises - ------ - ValueError - If the target sequence does is not dna or protein (or inferred as dna or protein) - ValueError - If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) - ValidationError - If one of the variants fails validation - """ - validate_variant_column(column, is_index) - prefixes = generate_variant_prefixes(column) - validate_variant_formatting( - column, prefixes, [target.accession for target in targets if target.accession is not None], True - ) - - # validate the individual variant strings - # prepare the target sequences for validation - target_seqs: dict[str, Union[str, None]] = {} - for target in targets: - assert target.accession is not None - # We shouldn't have to worry about translating protein sequences when we deal with accession based variants - if str(column.name).lower() == hgvs_nt_column or str(column.name).lower() == hgvs_pro_column: - target_seqs[target.accession] = target.accession - - # TODO: no splice col for genomic coordinate variants? - elif str(column.name).lower() == hgvs_splice_column: - target_seqs[target.accession] = None # don't validate splice variants against a target sequence - - else: - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - hp = hgvs.parser.Parser() - vr = hgvs.validator.Validator(hdp=hdp) - - invalid_variants = list() - for i, s in column.items(): - if s is not None: - for variant in s.split(" "): - try: - # We set strict to `False` to suppress validation warnings about intronic variants. - vr.validate(hp.parse(variant), strict=False) - except hgvs.exceptions.HGVSError as e: - invalid_variants.append(f"Failed to parse row {i} with HGVS exception: {e}") - - # format and raise an error message that contains all invalid variants - if len(invalid_variants) > 0: - raise ValidationError( - f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants - ) - - -def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: list[str], fully_qualified: bool): - """ - Validate the formatting of HGVS variants present in the passed column against - lists of prefixes and targets - - Parameters - ---------- - column : pd.Series - A pandas column containing HGVS variants - prefixes : list[str] - A list of prefixes we can expect to occur within the passed column - targets : list[str] - A list of targets we can expect to occur within the passed column - - Returns - ------- - None - - Raises - ------ - ValidationError - If any of the variants in the column are not fully qualified with respect to multiple possible targets - ValidationError - If the column contains multiple prefixes or the wrong prefix for that column name - ValidationError - If the column contains target accessions not present in the list of possible targets - """ - variants = [variant for s in column.dropna() for variant in s.split(" ")] - - # if there is more than one target, we expect variants to be fully qualified - if fully_qualified: - if not all(len(str(v).split(":")) == 2 for v in variants): - raise ValidationError( - f"variant column '{column.name}' needs fully qualified coordinates when validating against multiple targets" - ) - if len(set(str(v).split(":")[1][:2] for v in variants)) > 1: - raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") - if not all(str(v).split(":")[1][:2] in prefixes for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") - if not all(str(v).split(":")[0] in targets for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") - - else: - if len(set(v[:2] for v in variants)) > 1: - raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") - if not all(v[:2] in prefixes for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") - - -def generate_variant_prefixes(column: pd.Series): - """ - Generate variant prefixes for the provided column - - Parameters - ---------- - column : pd.Series - The pandas column from which to generate variant prefixes - - Returns - ------- - None - - Raises - ------ - ValueError - If the provided pandas column has an unrecognized variant column name - """ - if str(column.name).lower() == hgvs_nt_column: - return [f"{a}." for a in "cngmo"] - if str(column.name).lower() == hgvs_splice_column: - return [f"{a}." for a in "cn"] - if str(column.name).lower() == hgvs_pro_column: - return ["p."] - - raise ValueError(f"unrecognized hgvs column name '{column.name}'") - - -def validate_variant_column(column: pd.Series, is_index: bool): - """ - Validate critical column properties of an HGVS variant column, with special - attention to certain properties expected on index columns - - Parameters - ---------- - column : pd.Series - The pandas column containing HGVS variant information - id_index : bool - Whether the provided column is the index column - - Returns - ------- - None - - Raises - ------ - ValidationError - If an index column contains missing or non-unique values - ValidationError - If a column contains any numeric data - """ - if infer_column_type(column) not in ("string", "empty"): - raise ValidationError(f"variant column '{column.name}' cannot contain numeric data") - if column.isna().any() and is_index: - raise ValidationError(f"primary variant column '{column.name}' cannot contain null values") - if not column.is_unique and is_index: - raise ValidationError(f"primary variant column '{column.name}' must contain unique values") - - -def validate_hgvs_prefix_combinations( - hgvs_nt: Optional[str], hgvs_splice: Optional[str], hgvs_pro: Optional[str], transgenic: bool -) -> None: - """ - Validate the combination of HGVS variant prefixes. - - This function assumes that other validation, such as checking that all variants in the column have the same prefix, - has already been performed. - - Parameters - ---------- - hgvs_nt : Optional[str] - The first character (prefix) of the HGVS nucleotide variant strings, or None if not used. - hgvs_splice : Optional[str] - The first character (prefix) of the HGVS splice variant strings, or None if not used. - hgvs_pro : Optional[str] - The first character (prefix) of the HGVS protein variant strings, or None if not used. - transgenic : bool - Whether we should validate these prefix combinations as transgenic variants - - Returns - ------- - None - - Raises - ------ - ValueError - If upstream validation failed and an invalid prefix string was passed to this function - ValidationError - If the combination of prefixes is not valid - """ - # ensure that the prefixes are valid - this validation should have been performed before this function was called - if hgvs_nt not in list("cngmo") + [None]: - raise ValueError("invalid nucleotide prefix") - if hgvs_splice not in list("cn") + [None]: - raise ValueError("invalid nucleotide prefix") - if hgvs_pro not in ["p", None]: - raise ValueError("invalid protein prefix") - - # test agreement of prefixes across columns - if hgvs_splice is not None: - if hgvs_nt not in list("gmo"): - raise ValidationError("nucleotide variants must use valid genomic prefix when splice variants are present") - if hgvs_pro is not None: - if hgvs_splice != "c": - raise ValidationError("splice variants' must use 'c.' prefix when protein variants are present") - else: - if hgvs_splice != "n": - raise ValidationError("splice variants must use 'n.' prefix when protein variants are not present") - elif hgvs_pro is not None and hgvs_nt is not None: - if hgvs_nt != "c": - raise ValidationError( - "nucleotide variants must use 'c.' prefix when protein variants are present and splicing variants are" - " not present" - ) - # Only raise if this data will not be validated by biocommons.hgvs - elif hgvs_nt is not None: # just hgvs_nt - if hgvs_nt != "n" and transgenic: - raise ValidationError("nucleotide variants must use 'n.' prefix when only nucleotide variants are defined") - - -def validate_variant_consistency(df: pd.DataFrame) -> None: - """ - Ensure that variants defined in a single row describe the same variant. - - Parameters - ---------- - df : pd.DataFrame - - Returns - ------- - None - - """ - # TODO - pass - - -def validate_data_column(column: pd.Series, force_numeric: bool = False) -> None: - """ - Validate the contents of a data column. - - Parameters - ---------- - column : pandas.Series - A data column from a dataframe - force_numeric : bool - Force the data to be numeric, used for score column and count data - - Returns - ------- - None - - Raises - ------ - ValidationError - If the data is all null - ValidationError - If the data is of mixed numeric and string types - ValidationError - If the data is not numeric and force_numeric is True - - """ - column_type = infer_column_type(column) - if column_type == "empty": - raise ValidationError(f"data column '{column.name}' contains no data") - elif column_type == "mixed": - raise ValidationError(f"data column '{column.name}' has mixed string and numeric types") - elif force_numeric and column_type != "numeric": - raise ValidationError(f"data column '{column.name}' must contain only numeric data") - - -def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): - """ - Checks if two dataframes have matching HGVS columns. - - The check performed is order-independent. - This function is used to validate a pair of scores and counts dataframes that were uploaded together. - - Parameters - ---------- - df1 : pandas.DataFrame - Dataframe parsed from an uploaded scores file - df2 : pandas.DataFrame - Dataframe parsed from an uploaded counts file - - Raises - ------ - ValidationError - If both dataframes do not define the same variant columns - ValidationError - If both dataframes do not define the same variants within each column - """ - for c in df1.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - if c not in df2: - raise ValidationError("both score and count dataframes must define matching HGVS columns") - elif df1[c].isnull().all() and df2[c].isnull().all(): - continue - elif np.any(df1[c].sort_values().values != df2[c].sort_values().values): - raise ValidationError( - f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" - ) - for c in df2.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - if c not in df1: - raise ValidationError("both score and count dataframes must define matching HGVS columns") diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py new file mode 100644 index 00000000..8505a8cc --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -0,0 +1,252 @@ +from typing import Optional + +import numpy as np +import pandas as pd +from fqfa.util.translate import translate_dna + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, +) +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.target_sequence import TargetSequence + + +def infer_column_type(col: pd.Series) -> str: + """Infer whether the given column contains string or numeric data. + + The function returns "string" for string columns or "numeric" for numeric columns. + If there is a mixture of types it returns "mixed". + If every value in the column is `None` or NA it returns "empty". + + Parameters + ---------- + col : pandas.Series + The column to inspect + + Returns + ------- + str + One of "string", "numeric", "mixed", or "empty" + """ + if col.isna().all(): + return "empty" + else: + col_numeric = pd.to_numeric(col, errors="coerce") + if col_numeric.isna().all(): # nothing converted to a number + return "string" + elif np.all(col.isna() == col_numeric.isna()): # all non-NA values converted + return "numeric" + else: # some values converted but not all + return "mixed" + + +def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: list[str], fully_qualified: bool): + """ + Validate the formatting of HGVS variants present in the passed column against + lists of prefixes and targets + + Parameters + ---------- + column : pd.Series + A pandas column containing HGVS variants + prefixes : list[str] + A list of prefixes we can expect to occur within the passed column + targets : list[str] + A list of targets we can expect to occur within the passed column + + Returns + ------- + None + + Raises + ------ + ValidationError + If any of the variants in the column are not fully qualified with respect to multiple possible targets + ValidationError + If the column contains multiple prefixes or the wrong prefix for that column name + ValidationError + If the column contains target accessions not present in the list of possible targets + """ + variants = [variant for s in column.dropna() for variant in s.split(" ")] + + # if there is more than one target, we expect variants to be fully qualified + if fully_qualified: + if not all(len(str(v).split(":")) == 2 for v in variants): + raise ValidationError( + f"variants in the provided column '{column.name}' were expected to be fully qualified, but are not described in relation to an accession" + ) + if len(set(str(v).split(":")[1][:2] for v in variants)) > 1: + raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") + if not all(str(v).split(":")[1][:2] in prefixes for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") + if not all(str(v).split(":")[0] in targets for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") + + else: + if len(set(v[:2] for v in variants)) > 1: + raise ValidationError(f"variant column '{column.name}' has inconsistent variant prefixes") + if not all(v[:2] in prefixes for v in variants): + raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") + + +def generate_variant_prefixes(column: pd.Series): + """ + Generate variant prefixes for the provided column + + Parameters + ---------- + column : pd.Series + The pandas column from which to generate variant prefixes + + Returns + ------- + None + + Raises + ------ + ValueError + If the provided pandas column has an unrecognized variant column name + """ + if str(column.name).lower() == hgvs_nt_column: + return [f"{a}." for a in "cngmo"] + if str(column.name).lower() == hgvs_splice_column: + return [f"{a}." for a in "cn"] + if str(column.name).lower() == hgvs_pro_column: + return ["p."] + + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + +def validate_variant_column(column: pd.Series, is_index: bool): + """ + Validate critical column properties of an HGVS variant column, with special + attention to certain properties expected on index columns + + Parameters + ---------- + column : pd.Series + The pandas column containing HGVS variant information + id_index : bool + Whether the provided column is the index column + + Returns + ------- + None + + Raises + ------ + ValidationError + If an index column contains missing or non-unique values + ValidationError + If a column contains any numeric data + """ + if infer_column_type(column) not in ("string", "empty"): + raise ValidationError(f"variant column '{column.name}' cannot contain numeric data") + if column.isna().any() and is_index: + raise ValidationError(f"primary variant column '{column.name}' cannot contain null values") + if not column.is_unique and is_index: + raise ValidationError(f"primary variant column '{column.name}' must contain unique values") + + +def validate_data_column(column: pd.Series, force_numeric: bool = False) -> None: + """ + Validate the contents of a data column. + + Parameters + ---------- + column : pandas.Series + A data column from a dataframe + force_numeric : bool + Force the data to be numeric, used for score column and count data + + Returns + ------- + None + + Raises + ------ + ValidationError + If the data is all null + ValidationError + If the data is of mixed numeric and string types + ValidationError + If the data is not numeric and force_numeric is True + + """ + column_type = infer_column_type(column) + if column_type == "empty": + raise ValidationError(f"data column '{column.name}' contains no data") + elif column_type == "mixed": + raise ValidationError(f"data column '{column.name}' has mixed string and numeric types") + elif force_numeric and column_type != "numeric": + raise ValidationError(f"data column '{column.name}' must contain only numeric data") + + +def validate_hgvs_column_properties(column: pd.Series, observed_sequence_types: list[str]) -> None: + """ + Validates the properties of an HGVS column in a DataFrame. + + Parameters + ---------- + column : pd.Series + The column to validate. + observed_sequence_types : list[str] + A list of observed sequence types. + + Returns + ------- + None + + Raises + ------ + ValueError + If the column name is 'hgvs_nt' and 'dna' is not in the observed sequence types. + ValueError + If the column name is not recognized as either 'hgvs_splice' or 'hgvs_pro'. + """ + if str(column.name).lower() == hgvs_nt_column: + if "dna" not in observed_sequence_types: + raise ValueError( + f"invalid target sequence type(s) for '{column.name}'. At least one target should be of type `dna`. Observed types: {observed_sequence_types}" + ) + elif str(column.name).lower() != hgvs_splice_column and str(column.name).lower() != hgvs_pro_column: + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + +def construct_target_sequence_mappings( + column: pd.Series, targets: dict[str, TargetSequence] +) -> dict[str, Optional[str]]: + """ + Constructs a mapping of target sequences based on the provided column and targets. Translates protein sequences + to DNA sequences if needed for passed protein columns. Don't validate splice columns against provided sequences. + + Parameters + ---------- + column : pd.Series + The pandas Series representing the column to be validated. + targets : dict[str, TargetSequence] + A dictionary where keys are target names and values are TargetSequence objects. + + Returns + ------- + dict[str, Union[str, pd.Series]]: A dictionary where keys are target names and values are either the target sequence, + the translated target sequence, or None depending on the column type. + """ + if str(column.name).lower() not in (hgvs_nt_column, hgvs_pro_column, hgvs_splice_column): + raise ValueError(f"unrecognized hgvs column name '{column.name}'") + + if str(column.name).lower() == hgvs_splice_column: + return {name: None for name in targets.keys()} + + return { + name: translate_dna(target.sequence)[0] + if ( + str(column.name).lower() == hgvs_pro_column + and target.sequence_type == "dna" + and target.sequence is not None + ) + else target.sequence + for name, target in targets.items() + } diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py new file mode 100644 index 00000000..a8ab6557 --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -0,0 +1,371 @@ +from typing import Optional, Tuple, TYPE_CHECKING + +import numpy as np +import pandas as pd + +from mavedb.lib.exceptions import MixedTargetError +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.models.target_gene import TargetGene +from mavedb.lib.validation.dataframe.column import validate_data_column +from mavedb.lib.validation.dataframe.variant import ( + validate_hgvs_transgenic_column, + validate_hgvs_genomic_column, + validate_hgvs_prefix_combinations, +) + +if TYPE_CHECKING: + from cdot.hgvs.dataproviders import RESTDataProvider + + +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) + + +def validate_and_standardize_dataframe_pair( + scores_df: pd.DataFrame, + counts_df: Optional[pd.DataFrame], + targets: list[TargetGene], + hdp: Optional["RESTDataProvider"], +) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + """ + Perform validation and standardization on a pair of score and count dataframes. + + Parameters + ---------- + scores_df : pandas.DataFrame + The scores dataframe + counts_df : Optional[pandas.DataFrame] + The counts dataframe, can be None if not present + targets : str + The target genes on which to validate dataframes + hdp : RESTDataProvider + The biocommons.hgvs compatible data provider. Used to fetch sequences for hgvs validation. + + Returns + ------- + Tuple[pd.DataFrame, Optional[pd.DataFrame]] + The standardized score and count dataframes, or score and None if no count dataframe was provided + + Raises + ------ + ValidationError + If one of the validation functions raises an exception + """ + if not targets: + raise ValueError("Can't validate provided file with no targets.") + + validate_dataframe(scores_df, "scores", targets, hdp) + if counts_df is not None: + validate_dataframe(counts_df, "counts", targets, hdp) + validate_variant_columns_match(scores_df, counts_df) + + new_scores_df = standardize_dataframe(scores_df) + new_counts_df = standardize_dataframe(counts_df) if counts_df is not None else None + return new_scores_df, new_counts_df + + +def validate_dataframe( + df: pd.DataFrame, kind: str, targets: list["TargetGene"], hdp: Optional["RESTDataProvider"] +) -> None: + """ + Validate that a given dataframe passes all checks. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to validate + kind : str + The kind of dataframe "counts" or "scores" + target_seq : str + The target sequence to validate variants against + target_seq_type : str + The kind of target sequence, one of "infer" "dna" or "protein" + + Returns + ------- + None + + Raises + ------ + ValidationError + If one of the validators called raises an exception + """ + # basic checks + validate_column_names(df, kind) + validate_no_null_rows(df) + + column_mapping = {c.lower(): c for c in df.columns} + index_column = choose_dataframe_index_column(df) + + prefixes: dict[str, Optional[str]] = dict() + for c in column_mapping: + if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + is_index = column_mapping[c] == index_column + prefixes[c] = None + + # Ignore validation for null non-index hgvs columns + if df[column_mapping[c]].isna().all() and not is_index: + continue + + score_set_is_accession_based = all(target.target_accession for target in targets) + score_set_is_sequence_based = all(target.target_sequence for target in targets) + + # This is typesafe, despite Pylance's claims otherwise + if score_set_is_accession_based and not score_set_is_sequence_based: + validate_hgvs_genomic_column( + df[column_mapping[c]], + is_index, + [target.target_accession for target in targets], + hdp, # type: ignore + ) + elif score_set_is_sequence_based and not score_set_is_accession_based: + validate_hgvs_transgenic_column( + df[column_mapping[c]], + is_index, + {target.target_sequence.label: target.target_sequence for target in targets}, # type: ignore + ) + else: + raise MixedTargetError("Could not validate dataframe against provided mixed target types.") + + # post validation, handle prefixes. We've already established these columns are non-null + if score_set_is_accession_based or len(targets) > 1: + prefixes[c] = ( + df[column_mapping[c]].dropna()[0].split(" ")[0].split(":")[1][0] + ) # Just take the first prefix, we validate consistency elsewhere + else: + prefixes[c] = df[column_mapping[c]].dropna()[0][0] + + else: + force_numeric = (c == required_score_column) or (kind == "counts") + validate_data_column(df[column_mapping[c]], force_numeric) + + validate_hgvs_prefix_combinations( + hgvs_nt=prefixes[hgvs_nt_column], + hgvs_splice=prefixes[hgvs_splice_column], + hgvs_pro=prefixes[hgvs_pro_column], + transgenic=all(target.target_sequence for target in targets), + ) + + +def standardize_dataframe(df: pd.DataFrame) -> pd.DataFrame: + """Standardize a dataframe by sorting the columns and changing the standard column names to lowercase. + + The standard column names are: + * hgvs_nt + * hgvs_splice + * hgvs_pro + * score + + Case for other columns is preserved. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to standardize + + Returns + ------- + pandas.DataFrame + The standardized dataframe + """ + column_mapper = {x: x.lower() for x in df.columns if x.lower() in STANDARD_COLUMNS} + + df.rename(columns=column_mapper, inplace=True) + + return sort_dataframe_columns(df) + + +def sort_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame: + """Sort the columns of the given dataframe according to the expected ordering in MaveDB. + + MaveDB expects that dataframes have columns in the following order (note some columns are optional): + * hgvs_nt + * hgvs_splice + * hgvs_pro + * score + * other + + Parameters + ---------- + df : pandas.DataFrame + The dataframe with columns to sort + + Returns + ------- + pandas.DataFrame + The dataframe with the same data but sorted columns + """ + + def column_sort_function(value, columns): + if value.lower() in STANDARD_COLUMNS: + return STANDARD_COLUMNS.index(value.lower()) + else: + return columns.index(value) + len(STANDARD_COLUMNS) + + old_columns = list(df.columns) + new_columns = sorted(old_columns, key=lambda v: column_sort_function(v, old_columns)) + + return df[new_columns] + + +def validate_column_names(df: pd.DataFrame, kind: str) -> None: + """Validate the column names in a dataframe. + + This function validates the column names in the given dataframe. + It can be run for either a "scores" dataframe or a "counts" dataframe. + A "scores" dataframe must have a column named 'score' and a "counts" dataframe cannot have a column named 'score'. + + The function also checks for a valid combination of columns that define variants. + + Basic checks are performed to make sure that a column name is not empty, null, or whitespace, + as well as making sure there are no duplicate column names. + + Parameters + ---------- + df : pandas.DataFrame + The scores or counts dataframe to be validated + + kind : str + Either "counts" or "scores" depending on the kind of dataframe being validated + + Raises + ------ + ValidationError + If the column names are not valid + """ + if any(type(c) is not str for c in df.columns): + raise ValidationError("column names must be strings") + + if any(c.isspace() for c in df.columns) or any(len(c) == 0 for c in df.columns): + raise ValidationError("column names cannot be empty or whitespace") + + columns = [c.lower() for c in df.columns] + + if kind == "scores": + if required_score_column not in columns: + raise ValidationError(f"score dataframe must have a '{required_score_column}' column") + elif kind == "counts": + if required_score_column in columns: + raise ValidationError(f"counts dataframe must not have a '{required_score_column}' column") + else: + raise ValueError("kind only accepts scores and counts") + + if hgvs_splice_column in columns: + if hgvs_nt_column not in columns or hgvs_pro_column not in columns: + raise ValidationError( + f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" + ) + + if len(columns) != len(set(columns)): + raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") + + if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + raise ValidationError("dataframe does not define any variant columns") + + if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + raise ValidationError("dataframe does not define any data columns") + + +def validate_no_null_rows(df: pd.DataFrame) -> None: + """Check that there are no fully null rows in the dataframe. + + Parameters + __________ + df : pandas.DataFrame + The scores or counts dataframe being validated + + Raises + ______ + ValidationError + If there are null rows in the dataframe + """ + if any(df.isnull().all(axis=1)): + raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") + + +def choose_dataframe_index_column(df: pd.DataFrame) -> str: + """ + Identify the HGVS variant column that should be used as the index column in this dataframe. + + Parameters + ---------- + df : pandas.DataFrame + The dataframe to check + + Returns + ------- + str + The column name of the index column + + Raises + ------ + ValidationError + If no valid HGVS variant column is found + """ + column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} + + if hgvs_nt_column in column_mapping: + return column_mapping[hgvs_nt_column] + elif hgvs_pro_column in column_mapping: + return column_mapping[hgvs_pro_column] + else: + raise ValidationError("failed to find valid HGVS variant column") + + +def validate_variant_consistency(df: pd.DataFrame) -> None: + """ + Ensure that variants defined in a single row describe the same variant. + + Parameters + ---------- + df : pd.DataFrame + + Returns + ------- + None + + """ + # TODO + pass + + +def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): + """ + Checks if two dataframes have matching HGVS columns. + + The check performed is order-independent. + This function is used to validate a pair of scores and counts dataframes that were uploaded together. + + Parameters + ---------- + df1 : pandas.DataFrame + Dataframe parsed from an uploaded scores file + df2 : pandas.DataFrame + Dataframe parsed from an uploaded counts file + + Raises + ------ + ValidationError + If both dataframes do not define the same variant columns + ValidationError + If both dataframes do not define the same variants within each column + """ + for c in df1.columns: + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c not in df2: + raise ValidationError("both score and count dataframes must define matching HGVS columns") + elif df1[c].isnull().all() and df2[c].isnull().all(): + continue + elif np.any(df1[c].sort_values().values != df2[c].sort_values().values): + raise ValidationError( + f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" + ) + for c in df2.columns: + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c not in df1: + raise ValidationError("both score and count dataframes must define matching HGVS columns") diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py new file mode 100644 index 00000000..eb81873d --- /dev/null +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -0,0 +1,329 @@ +import logging +import warnings +from typing import Hashable, Optional, TYPE_CHECKING + +import pandas as pd +from mavehgvs.exceptions import MaveHgvsParseError +from mavehgvs.variant import Variant + +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.validation.dataframe.column import ( + generate_variant_prefixes, + validate_variant_column, + validate_variant_formatting, + validate_hgvs_column_properties, + construct_target_sequence_mappings, +) +from mavedb.lib.validation.constants.target import strict_valid_sequence_types as valid_sequence_types + + +from mavedb.models.target_sequence import TargetSequence +from mavedb.models.target_accession import TargetAccession + +if TYPE_CHECKING: + from cdot.hgvs.dataproviders import RESTDataProvider + from hgvs.parser import Parser + from hgvs.validator import Validator + + +logger = logging.getLogger(__name__) + + +def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: dict[str, TargetSequence]) -> None: + """ + Validate the variants in an HGVS column from a dataframe. + + Tests whether the column has a correct and consistent prefix. + This function also validates all individual variants in the column and checks for agreement against the target + sequence (for non-splice variants). + + Implementation NOTE: We assume variants will only be presented as fully qualified (accession:variant) + if this column is being validated against multiple targets. + + Parameters + ---------- + column : pd.Series + The column from the dataframe to validate + is_index : bool + True if this is the index column for the dataframe and therefore cannot have missing values; else False + targets : dict + Dictionary containing a mapping of target gene names to their sequences. + + Returns + ------- + None + + Raises + ------ + ValueError + If the target sequence does is not dna or protein (or inferred as dna or protein) + ValueError + If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) + ValidationError + If one of the variants fails validation + """ + validate_variant_column(column, is_index) + validate_variant_formatting( + column=column, + prefixes=generate_variant_prefixes(column), + targets=list(targets.keys()), + fully_qualified=len(targets) > 1, + ) + + observed_sequence_types = validate_observed_sequence_types(targets) + validate_hgvs_column_properties(column, observed_sequence_types) + target_seqs = construct_target_sequence_mappings(column, targets) + + parsed_variants = [ + parse_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() + ] + + # format and raise an error message that contains all invalid variants + if any(not valid for valid, _ in parsed_variants): + invalid_variants = [variant for valid, variant in parsed_variants if not valid] + raise ValidationError( + f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants + ) + + return + + +def validate_hgvs_genomic_column( + column: pd.Series, is_index: bool, targets: list[TargetAccession], hdp: Optional["RESTDataProvider"] +) -> None: + """ + Validate the variants in an HGVS column from a dataframe. + + Tests whether the column has a correct and consistent prefix. + This function also validates all individual variants in the column and checks for agreement against the target + sequence (for non-splice variants). + + Parameters + ---------- + column : pd.Series + The column from the dataframe to validate + is_index : bool + True if this is the index column for the dataframe and therefore cannot have missing values; else False + targets : list + Dictionary containing a list of target accessions. + + Returns + ------- + None + + Raises + ------ + ValueError + If the target sequence does is not dna or protein (or inferred as dna or protein) + ValueError + If the target sequence is not valid for the variants (e.g. protein sequence for nucleotide variants) + ValidationError + If one of the variants fails validation + """ + target_accession_identifiers = [target.accession for target in targets if target.accession is not None] + validate_variant_column(column, is_index) + validate_variant_formatting( + column=column, + prefixes=generate_variant_prefixes(column), + targets=target_accession_identifiers, + fully_qualified=True, + ) + + # Attempt to import dependencies from the hgvs package. + # + # For interoperability with Mavetools, we'd prefer if users were not required to install `hgvs`, which requires postgresql and psycopg2 as + # dependencies. We resolve these dependencies only when necessary, treating them as semi-optional. For the purposes of this package, if the + # hdp parameter is ever omitted it will be inferred so long as the `hgvs` package is installed and available. For the purposes of validator + # packages such as Mavetools, users may omit the hdp parameter and proceed with non-strict validation which will log a warning. To silence + # the warning, users should install `hgvs` and pass a data provider to this function. -capodb 2025-02-26 + try: + import hgvs.parser + import hgvs.validator + + if hdp is None: + import mavedb.deps + + hdp = mavedb.deps.hgvs_data_provider() + + hp = hgvs.parser.Parser() + vr = hgvs.validator.Validator(hdp=hdp) + + except ModuleNotFoundError as err: + if hdp is not None: + logger.error( + f"Failed to import `hgvs` from a context in which it is required. A data provider ({hdp.data_version()}) is available to this function, so " + + "it is inferred that strict validation is desired. Strict validation requires the `hgvs` package for parsing and validation of HGVS strings with " + + "accession information. Please ensure the `hgvs` package is installed (https://github.com/biocommons/hgvs/?tab=readme-ov-file#installing-hgvs-locally) " + + "to silence this error." + ) + raise err + + warnings.warn( + "Failed to import `hgvs`, and no data provider is available. Skipping strict validation of HGVS genomic variants. HGVS variant strings " + + "will be validated for format only, and accession information will be ignored and assumed correct. To enable strict validation against provided accessions and " + + "silence this warning, install the `hgvs` package. See: https://github.com/biocommons/hgvs/?tab=readme-ov-file#installing-hgvs-locally." + ) + + hp, vr = None, None + + if hp is not None and vr is not None: + parsed_variants = [parse_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] + else: + parsed_variants = [ + parse_transgenic_variant( + idx, + variant, + {target: None for target in target_accession_identifiers}, + len(target_accession_identifiers) > 1, + ) + for idx, variant in column.items() + ] + + # format and raise an error message that contains all invalid variants + if any(not valid for valid, _ in parsed_variants): + invalid_variants = [variant for valid, variant in parsed_variants if not valid] + raise ValidationError( + f"encountered {len(invalid_variants)} invalid variant strings.", triggers=invalid_variants + ) + + return + + +def parse_genomic_variant( + idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" +) -> tuple[bool, Optional[str]]: + # Not pretty, but if we make it here we're guaranteed to have hgvs installed as a package, and we + # should make use of the built in exception they provide for variant validation. + import hgvs.exceptions + + if not variant_string: + return True, None + + for variant in variant_string.split(" "): + try: + validator.validate(parser.parse(variant), strict=False) + except hgvs.exceptions.HGVSError as e: + return False, f"Failed to parse row {idx} with HGVS exception: {e}" + + return True, None + + +def parse_transgenic_variant( + idx: Hashable, variant_string: str, target_sequences: dict[str, Optional[str]], is_fully_qualified: bool +) -> tuple[bool, Optional[str]]: + if not variant_string: + return True, None + + # variants can exist on the same line separated by a space + for variant in variant_string.split(" "): + if is_fully_qualified: + name, variant = str(variant).split(":") + else: + name = list(target_sequences.keys())[0] + + if variant is not None: + try: + Variant(variant, targetseq=target_sequences[name]) + except MaveHgvsParseError: + try: + Variant(variant) # note this will get called a second time for splice variants + except MaveHgvsParseError: + return False, f"invalid variant string '{variant}' at row {idx} for sequence {name}" + else: + return False, f"target sequence mismatch for '{variant}' at row {idx} for sequence {name}" + + return True, None + + +def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list[str]: + """ + Ensures that the sequence types of the given target sequences are an accepted type. + + Parameters + ---------- + targets : (dict[str, TargetSequence]) + A dictionary where the keys are target names and the values are TargetSequence objects. + + Returns + ------- + list[str]: A list of observed sequence types from the target sequences. + + Raises + ------ + ValueError + If no targets are provided. + ValueError + If any of the target sequences have an invalid sequence type. + """ + if not targets: + raise ValueError("No targets were provided; cannot validate observed sequence types with none observed.") + + observed_sequence_types = [target.sequence_type for target in targets.values()] + invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) + if invalid_sequence_types: + raise ValueError( + f"Some targets are invalid sequence types: {invalid_sequence_types}. Sequence types shoud be one of: {valid_sequence_types}" + ) + + return observed_sequence_types + + +def validate_hgvs_prefix_combinations( + hgvs_nt: Optional[str], hgvs_splice: Optional[str], hgvs_pro: Optional[str], transgenic: bool +) -> None: + """ + Validate the combination of HGVS variant prefixes. + + This function assumes that other validation, such as checking that all variants in the column have the same prefix, + has already been performed. + + Parameters + ---------- + hgvs_nt : Optional[str] + The first character (prefix) of the HGVS nucleotide variant strings, or None if not used. + hgvs_splice : Optional[str] + The first character (prefix) of the HGVS splice variant strings, or None if not used. + hgvs_pro : Optional[str] + The first character (prefix) of the HGVS protein variant strings, or None if not used. + transgenic : bool + Whether we should validate these prefix combinations as transgenic variants + + Returns + ------- + None + + Raises + ------ + ValueErrorz + If upstream validation failed and an invalid prefix string was passed to this function + ValidationError + If the combination of prefixes is not valid + """ + # ensure that the prefixes are valid - this validation should have been performed before this function was called + if hgvs_nt not in list("cngmo") + [None]: + raise ValueError("invalid nucleotide prefix") + if hgvs_splice not in list("cn") + [None]: + raise ValueError("invalid nucleotide prefix") + if hgvs_pro not in ["p", None]: + raise ValueError("invalid protein prefix") + + # test agreement of prefixes across columns + if hgvs_splice is not None: + if hgvs_nt not in list("gmo"): + raise ValidationError("nucleotide variants must use valid genomic prefix when splice variants are present") + if hgvs_pro is not None: + if hgvs_splice != "c": + raise ValidationError("splice variants' must use 'c.' prefix when protein variants are present") + else: + if hgvs_splice != "n": + raise ValidationError("splice variants must use 'n.' prefix when protein variants are not present") + elif hgvs_pro is not None and hgvs_nt is not None: + if hgvs_nt != "c": + raise ValidationError( + "nucleotide variants must use 'c.' prefix when protein variants are present and splicing variants are" + " not present" + ) + # Only raise if this data will not be validated by biocommons.hgvs + elif hgvs_nt is not None: # just hgvs_nt + if hgvs_nt != "n" and transgenic: + raise ValidationError("nucleotide variants must use 'n.' prefix when only nucleotide variants are defined") diff --git a/src/mavedb/lib/validation/py.typed b/src/mavedb/lib/validation/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index d30064ff..2b694268 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -40,7 +40,7 @@ create_variants_data, ) from mavedb.lib.slack import send_slack_error, send_slack_message -from mavedb.lib.validation.dataframe import ( +from mavedb.lib.validation.dataframe.dataframe import ( validate_and_standardize_dataframe_pair, ) from mavedb.lib.validation.exceptions import ValidationError From 5c7e82359adb4bc2f15d8e5c9ba8b8242f3d4256 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:26:56 -0800 Subject: [PATCH 090/166] Refactor Tests to Better Identify Dependency Separation Issues Refactors most of the test suite to better identify dependency separation problems. Validation tests may now be run with only core (and dev) dependencies installed, and fixtures which operate on server dependencies are conditionally loaded based on the installed modules. With this change, it will be much more straightforward to identify dependency 'leaks', or server dependencies which mistakenly are leaked into validation type code. --- .github/workflows/run-tests-on-push.yml | 42 + tests/conftest.py | 301 +--- tests/conftest_optional.py | 301 ++++ tests/helpers/constants.py | 15 +- tests/helpers/util/access_key.py | 47 + tests/helpers/util/collection.py | 21 + tests/helpers/util/common.py | 16 + tests/helpers/util/contributor.py | 22 + tests/helpers/util/exceptions.py | 2 + tests/helpers/util/experiment.py | 22 + tests/helpers/util/license.py | 16 + tests/helpers/util/score_set.py | 175 +++ tests/helpers/util/user.py | 30 + tests/helpers/util/variant.py | 99 ++ tests/lib/test_authentication.py | 66 +- tests/lib/test_score_set.py | 84 +- tests/routers/conftest.py | 38 +- tests/routers/test_access_keys.py | 11 +- tests/routers/test_collections.py | 118 +- tests/routers/test_experiments.py | 319 +++-- tests/routers/test_hgvs.py | 12 +- tests/routers/test_licenses.py | 6 + tests/routers/test_permissions.py | 40 +- tests/routers/test_score_set.py | 1249 ++++++++--------- tests/routers/test_statistics.py | 109 +- tests/routers/test_target_gene.py | 89 +- tests/routers/test_users.py | 8 +- tests/validation/dataframe/conftest.py | 43 + tests/validation/dataframe/test_column.py | 272 ++++ tests/validation/dataframe/test_dataframe.py | 414 ++++++ tests/validation/dataframe/test_variant.py | 893 ++++++++++++ tests/validation/test_dataframe.py | 1121 --------------- tests/view_models/test_experiment.py | 59 +- .../test_external_gene_identifiers.py | 26 +- .../test_publication_identifier.py | 14 +- tests/view_models/test_score_set.py | 115 +- tests/view_models/test_user.py | 7 +- tests/view_models/test_wild_type_sequence.py | 6 +- tests/worker/conftest.py | 11 +- tests/worker/test_jobs.py | 34 +- 40 files changed, 3666 insertions(+), 2607 deletions(-) create mode 100644 tests/conftest_optional.py create mode 100644 tests/helpers/util/access_key.py create mode 100644 tests/helpers/util/collection.py create mode 100644 tests/helpers/util/common.py create mode 100644 tests/helpers/util/contributor.py create mode 100644 tests/helpers/util/exceptions.py create mode 100644 tests/helpers/util/experiment.py create mode 100644 tests/helpers/util/license.py create mode 100644 tests/helpers/util/score_set.py create mode 100644 tests/helpers/util/user.py create mode 100644 tests/helpers/util/variant.py create mode 100644 tests/validation/dataframe/conftest.py create mode 100644 tests/validation/dataframe/test_column.py create mode 100644 tests/validation/dataframe/test_dataframe.py create mode 100644 tests/validation/dataframe/test_variant.py delete mode 100644 tests/validation/test_dataframe.py diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index ad6359cd..1dccd615 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -6,6 +6,20 @@ env: LOG_CONFIG: test jobs: + run-tests-3_9-core-dependencies: + runs-on: ubuntu-20.04 + name: Pytest on Python 3.9 / Ubuntu 20.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.9" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_9: runs-on: ubuntu-latest name: Pytest on Python 3.9 @@ -20,6 +34,20 @@ jobs: - run: poetry install --with dev --extras server - run: poetry run pytest tests/ --show-capture=stdout --cov=src + run-tests-3_10-core-dependencies: + runs-on: ubuntu-latest + name: Pytest on Python 3.10 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_10: runs-on: ubuntu-latest name: Pytest on Python 3.10 @@ -34,6 +62,20 @@ jobs: - run: poetry install --with dev --extras server - run: poetry run pytest tests/ --show-capture=stdout --cov=src + run-tests-3_11-core-dependencies: + runs-on: ubuntu-latest + name: Pytest on Python 3.11 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: 'pip' + - run: pip install --upgrade pip + - run: pip install poetry + - run: poetry install --with dev + - run: poetry run pytest tests/ + run-tests-3_11: runs-on: ubuntu-latest name: Pytest on Python 3.11 diff --git a/tests/conftest.py b/tests/conftest.py index c16ef610..c5a2ef9e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,44 +1,24 @@ -import os import logging # noqa: F401 import sys -from concurrent import futures -from inspect import getsourcefile -from os.path import abspath -from unittest.mock import patch -import cdot.hgvs.dataproviders import email_validator import pytest -import pytest_asyncio import pytest_postgresql -from arq import ArqRedis -from arq.worker import Worker -from fakeredis import FakeServer -from fakeredis.aioredis import FakeConnection -from fastapi.testclient import TestClient -from httpx import AsyncClient -from redis.asyncio.connection import ConnectionPool from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import NullPool from mavedb.db.base import Base -from mavedb.deps import get_db, get_worker, hgvs_data_provider -from mavedb.lib.authentication import UserData, get_current_user -from mavedb.lib.authorization import require_current_user -from mavedb.models.user import User -from mavedb.server_main import app -from mavedb.worker.jobs import ( - create_variants_for_score_set, - map_variants_for_score_set, - variant_mapper_manager, - submit_score_set_mappings_to_ldh, - link_clingen_variants, -) sys.path.append(".") -from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER +# Attempt to import optional top level fixtures. If the modules they depend on are not installed, +# we won't have access to our full fixture suite and only a limited subset of tests can be run. +try: + from tests.conftest_optional import * # noqa: F401, F403 + +except ModuleNotFoundError: + pass # needs the pytest_postgresql plugin installed assert pytest_postgresql.factories @@ -67,270 +47,3 @@ def session(postgresql): finally: session.close() Base.metadata.drop_all(bind=engine) - - -@pytest.fixture -def data_provider(): - """ - To provide the transcript for the FASTA file without a network request, use: - - ``` - from helpers.utils.constants import TEST_CDOT_TRANSCRIPT - from unittest.mock import patch - import cdot.hgvs.dataproviders - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - ... - ``` - """ - - this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) - test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") - - data_provider = cdot.hgvs.dataproviders.RESTDataProvider( - seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( - cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), - # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). - cdot.hgvs.dataproviders.SeqFetcher(), - ) - ) - - yield data_provider - - -@pytest_asyncio.fixture -async def arq_redis(): - """ - If the `enqueue_job` method of the ArqRedis object is not mocked and you need to run worker - processes from within a test client, it can only be run within the `httpx.AsyncClient` object. - The `fastapi.testclient.TestClient` object does not provide sufficient support for invocations - of asynchronous events. Note that any tests using the worker directly should be marked as async: - - ``` - @pytest.mark.asyncio - async def some_test_with_worker(async_client, arq_redis): - ... - ``` - - You can mock the `enqueue_job` method with: - - ``` - from unittest.mock import patch - def some_test(client, arq_redis): - with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: - - # Enqueue a job directly - worker_queue.enqueue_job(some_job) - - # Hit an endpoint which enqueues a job - client.post("/some/endpoint/that/invokes/the/worker") - - # Ensure at least one job was queued - worker_queue.assert_called() - ``` - """ - redis_ = ArqRedis( - connection_pool=ConnectionPool( - server=FakeServer(), - connection_class=FakeConnection, - ) - ) - await redis_.flushall() - try: - yield redis_ - finally: - await redis_.aclose(close_connection_pool=True) - - -@pytest_asyncio.fixture() -async def arq_worker(data_provider, session, arq_redis): - """ - Run worker tasks in the test environment by including it as a fixture in a test, - enqueueing a job on the ArqRedis object, and then running the worker. See the arq_redis - fixture for limitations about running worker jobs from within a TestClient object. - - ``` - async def worker_test(arq_redis, arq_worker): - await arq_redis.enqueue_job('some_job') - await arq_worker.async_run() - await arq_worker.run_check() - ``` - """ - - async def on_startup(ctx): - pass - - async def on_job(ctx): - ctx["db"] = session - ctx["hdp"] = data_provider - ctx["state"] = {} - ctx["pool"] = futures.ProcessPoolExecutor() - - worker_ = Worker( - functions=[ - create_variants_for_score_set, - map_variants_for_score_set, - variant_mapper_manager, - submit_score_set_mappings_to_ldh, - link_clingen_variants, - ], - redis_pool=arq_redis, - burst=True, - poll_delay=0, - on_startup=on_startup, - on_job_start=on_job, - ) - # `fakeredis` does not support `INFO` - with patch("arq.worker.log_redis_info"): - try: - yield worker_ - finally: - await worker_.close() - - -@pytest.fixture -def standalone_worker_context(session, data_provider, arq_redis): - yield { - "db": session, - "hdp": data_provider, - "state": {}, - "job_id": "test_job", - "redis": arq_redis, - "pool": futures.ProcessPoolExecutor(), - } - - -@pytest.fixture() -def app_(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_require_user(): - default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - app.dependency_overrides[get_db] = override_get_db - app.dependency_overrides[get_worker] = override_get_worker - app.dependency_overrides[get_current_user] = override_current_user - app.dependency_overrides[require_current_user] = override_require_user - app.dependency_overrides[hgvs_data_provider] = override_hgvs_data_provider - - yield app - - -@pytest.fixture() -def anonymous_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - yield None - - def override_hgvs_data_provider(): - yield data_provider - - anonymous_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: require_current_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield anonymous_overrides - - -@pytest.fixture() -def extra_user_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_require_user(): - default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - yield UserData(default_user, default_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - anonymous_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: require_current_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield anonymous_overrides - - -@pytest.fixture() -def admin_app_overrides(session, data_provider, arq_redis): - def override_get_db(): - try: - yield session - finally: - session.close() - - async def override_get_worker(): - yield arq_redis - - def override_current_user(): - admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() - yield UserData(admin_user, admin_user.roles) - - def override_require_user(): - admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() - yield UserData(admin_user, admin_user.roles) - - def override_hgvs_data_provider(): - yield data_provider - - admin_overrides = { - get_db: override_get_db, - get_worker: override_get_worker, - get_current_user: override_current_user, - require_current_user: override_require_user, - hgvs_data_provider: override_hgvs_data_provider, - } - - yield admin_overrides - - -@pytest.fixture -def client(app_): - with TestClient(app=app_, base_url="http://testserver") as tc: - yield tc - - -@pytest_asyncio.fixture -async def async_client(app_): - async with AsyncClient(app=app_, base_url="http://testserver") as ac: - yield ac diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py new file mode 100644 index 00000000..e0f35fc4 --- /dev/null +++ b/tests/conftest_optional.py @@ -0,0 +1,301 @@ +import os +from concurrent import futures +from inspect import getsourcefile +from posixpath import abspath + +import cdot.hgvs.dataproviders +import pytest +import pytest_asyncio +from fastapi.testclient import TestClient +from httpx import AsyncClient +from unittest.mock import patch + +from mavedb.lib.authentication import UserData, get_current_user +from mavedb.lib.authorization import require_current_user +from mavedb.models.user import User +from mavedb.server_main import app +from mavedb.deps import get_db, get_worker, hgvs_data_provider +from arq.worker import Worker +from mavedb.worker.jobs import create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager + +from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER + +#################################################################################################### +# REDIS +#################################################################################################### + + +# Defer imports of redis and arq to support cases where validation tests are called with only core dependencies installed. +@pytest_asyncio.fixture +async def arq_redis(): + """ + If the `enqueue_job` method of the ArqRedis object is not mocked and you need to run worker + processes from within a test client, it can only be run within the `httpx.AsyncClient` object. + The `fastapi.testclient.TestClient` object does not provide sufficient support for invocations + of asynchronous events. Note that any tests using the worker directly should be marked as async: + + ``` + @pytest.mark.asyncio + async def some_test_with_worker(async_client, arq_redis): + ... + ``` + + You can mock the `enqueue_job` method with: + + ``` + from unittest.mock import patch + def some_test(client, arq_redis): + with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: + + # Enqueue a job directly + worker_queue.enqueue_job(some_job) + + # Hit an endpoint which enqueues a job + client.post("/some/endpoint/that/invokes/the/worker") + + # Ensure at least one job was queued + worker_queue.assert_called() + ``` + """ + from arq import ArqRedis + from fakeredis import FakeServer + from fakeredis.aioredis import FakeConnection + from redis.asyncio.connection import ConnectionPool + + redis_ = ArqRedis( + connection_pool=ConnectionPool( + server=FakeServer(), + connection_class=FakeConnection, + ) + ) + await redis_.flushall() + try: + yield redis_ + finally: + await redis_.aclose(close_connection_pool=True) + + +@pytest_asyncio.fixture() +async def arq_worker(data_provider, session, arq_redis): + """ + Run worker tasks in the test environment by including it as a fixture in a test, + enqueueing a job on the ArqRedis object, and then running the worker. See the arq_redis + fixture for limitations about running worker jobs from within a TestClient object. + + ``` + async def worker_test(arq_redis, arq_worker): + await arq_redis.enqueue_job('some_job') + await arq_worker.async_run() + await arq_worker.run_check() + ``` + """ + + async def on_startup(ctx): + pass + + async def on_job(ctx): + ctx["db"] = session + ctx["hdp"] = data_provider + ctx["state"] = {} + ctx["pool"] = futures.ProcessPoolExecutor() + + worker_ = Worker( + functions=[create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager], + redis_pool=arq_redis, + burst=True, + poll_delay=0, + on_startup=on_startup, + on_job_start=on_job, + ) + # `fakeredis` does not support `INFO` + with patch("arq.worker.log_redis_info"): + try: + yield worker_ + finally: + await worker_.close() + + +@pytest.fixture +def standalone_worker_context(session, data_provider, arq_redis): + yield { + "db": session, + "hdp": data_provider, + "state": {}, + "job_id": "test_job", + "redis": arq_redis, + "pool": futures.ProcessPoolExecutor(), + } + + +#################################################################################################### +# FASTA DATA PROVIDER +#################################################################################################### + + +@pytest.fixture +def data_provider(): + """ + To provide the transcript for the FASTA file without a network request, use: + + ``` + from helpers.utils.constants import TEST_CDOT_TRANSCRIPT + from unittest.mock import patch + import cdot.hgvs.dataproviders + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + ... + ``` + """ + this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) + test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + + data_provider = cdot.hgvs.dataproviders.RESTDataProvider( + seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( + cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), + # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). + cdot.hgvs.dataproviders.SeqFetcher(), + ) + ) + + yield data_provider + + +#################################################################################################### +# FASTAPI CLIENT +#################################################################################################### + + +@pytest.fixture() +def app_(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_require_user(): + default_user = session.query(User).filter(User.username == TEST_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + app.dependency_overrides[get_db] = override_get_db + app.dependency_overrides[get_worker] = override_get_worker + app.dependency_overrides[get_current_user] = override_current_user + app.dependency_overrides[require_current_user] = override_require_user + app.dependency_overrides[hgvs_data_provider] = override_hgvs_data_provider + + yield app + + +@pytest.fixture() +def anonymous_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + yield None + + def override_hgvs_data_provider(): + yield data_provider + + anonymous_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: require_current_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield anonymous_overrides + + +@pytest.fixture() +def extra_user_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_require_user(): + default_user = session.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + yield UserData(default_user, default_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + anonymous_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: override_require_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield anonymous_overrides + + +@pytest.fixture() +def admin_app_overrides(session, data_provider, arq_redis): + def override_get_db(): + try: + yield session + finally: + session.close() + + async def override_get_worker(): + yield arq_redis + + def override_current_user(): + admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() + yield UserData(admin_user, admin_user.roles) + + def override_require_user(): + admin_user = session.query(User).filter(User.username == ADMIN_USER["username"]).one_or_none() + yield UserData(admin_user, admin_user.roles) + + def override_hgvs_data_provider(): + yield data_provider + + admin_overrides = { + get_db: override_get_db, + get_worker: override_get_worker, + get_current_user: override_current_user, + require_current_user: override_require_user, + hgvs_data_provider: override_hgvs_data_provider, + } + + yield admin_overrides + + +@pytest.fixture +def client(app_): + with TestClient(app=app_, base_url="http://testserver") as tc: + yield tc + + +@pytest_asyncio.fixture +async def async_client(app_): + async with AsyncClient(app=app_, base_url="http://testserver") as ac: + yield ac diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index a134a468..a497d1d6 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -545,16 +545,16 @@ { "name": "TEST1", "category": "protein_coding", - "external_identifiers": [], "target_sequence": { "sequence_type": "dna", "sequence": "ACGTTT", - "reference": { - "id": 1, - "short_name": "Name", - "organism_name": "Organism", - "creation_date": date.today().isoformat(), - "modification_date": date.today().isoformat(), + "taxonomy": { + "tax_id": TEST_TAXONOMY["tax_id"], + "organism_name": TEST_TAXONOMY["organism_name"], + "common_name": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], }, }, } @@ -682,7 +682,6 @@ { "name": "TEST2", "category": "protein_coding", - "external_identifiers": [], "target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, } ], diff --git a/tests/helpers/util/access_key.py b/tests/helpers/util/access_key.py new file mode 100644 index 00000000..3058a24c --- /dev/null +++ b/tests/helpers/util/access_key.py @@ -0,0 +1,47 @@ +import secrets + +from sqlalchemy import select +from sqlalchemy.orm import Session +from fastapi.testclient import TestClient + +from mavedb.models.access_key import AccessKey +from mavedb.models.user import User +from mavedb.models.enums.user_role import UserRole + +from mavedb.routers.access_keys import generate_key_pair + + +def create_api_key_for_user(db: Session, username: str) -> str: + user = db.scalars(select(User).where(User.username == username)).one() + private_key, public_key = generate_key_pair() + + item = AccessKey(user=user, key_id=secrets.token_urlsafe(32), public_key=public_key) + db.add(item) + db.commit() + db.refresh(item) + + return item.key_id + + +def create_admin_key_for_user(db: Session, username: str) -> str: + user = db.scalars(select(User).where(User.username == username)).one() + private_key, public_key = generate_key_pair() + + item = AccessKey(user=user, key_id=secrets.token_urlsafe(32), public_key=public_key, role=UserRole.admin) + db.add(item) + db.commit() + db.refresh(item) + + return item.public_key + + +def create_api_key_for_current_user(client: TestClient) -> str: + response = client.post("api/v1/users/me/access-keys") + assert response.status_code == 200 + return response.json()["keyId"] + + +def create_admin_key_for_current_user(client: TestClient) -> str: + response = client.post("api/v1/users/me/access-keys/admin") + assert response.status_code == 200 + return response.json()["keyId"] diff --git a/tests/helpers/util/collection.py b/tests/helpers/util/collection.py new file mode 100644 index 00000000..e2cec1c1 --- /dev/null +++ b/tests/helpers/util/collection.py @@ -0,0 +1,21 @@ +import jsonschema +from copy import deepcopy +from typing import Any, Dict, Optional + +from mavedb.view_models.collection import Collection + +from tests.helpers.constants import TEST_COLLECTION +from fastapi.testclient import TestClient + + +def create_collection(client: TestClient, update: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + collection_payload = deepcopy(TEST_COLLECTION) + if update is not None: + collection_payload.update(update) + + response = client.post("/api/v1/collections/", json=collection_payload) + assert response.status_code == 200, "Could not create collection." + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=Collection.schema()) + return response_data diff --git a/tests/helpers/util/common.py b/tests/helpers/util/common.py new file mode 100644 index 00000000..d88f4a38 --- /dev/null +++ b/tests/helpers/util/common.py @@ -0,0 +1,16 @@ +from typing import Dict, Any + + +def update_expected_response_for_created_resources( + expected_response: Dict[str, Any], created_experiment: Dict[str, Any], created_score_set: Dict[str, Any] +) -> Dict[str, Any]: + expected_response.update({"urn": created_score_set["urn"]}) + expected_response["experiment"].update( + { + "urn": created_experiment["urn"], + "experimentSetUrn": created_experiment["experimentSetUrn"], + "scoreSetUrns": [created_score_set["urn"]], + } + ) + + return expected_response diff --git a/tests/helpers/util/contributor.py b/tests/helpers/util/contributor.py new file mode 100644 index 00000000..7ca05598 --- /dev/null +++ b/tests/helpers/util/contributor.py @@ -0,0 +1,22 @@ +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy import select +from sqlalchemy.orm import Session +from typing import Any + +from mavedb.models.contributor import Contributor + + +def add_contributor(db: Session, urn: str, model: Any, orcid_id: str, given_name: str, family_name: str) -> None: + """Without making an API call, add a new contributor to the record (experiment or score set) with given urn and model.""" + item = db.query(model).filter(model.urn == urn).one_or_none() + assert item is not None + + try: + contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).one() + except NoResultFound: + contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) + db.add(contributor) + + item.contributors = [contributor] + db.add(item) + db.commit() diff --git a/tests/helpers/util/exceptions.py b/tests/helpers/util/exceptions.py new file mode 100644 index 00000000..bb5a906c --- /dev/null +++ b/tests/helpers/util/exceptions.py @@ -0,0 +1,2 @@ +async def awaitable_exception() -> Exception: + return Exception() diff --git a/tests/helpers/util/experiment.py b/tests/helpers/util/experiment.py new file mode 100644 index 00000000..c130c076 --- /dev/null +++ b/tests/helpers/util/experiment.py @@ -0,0 +1,22 @@ +import jsonschema +from copy import deepcopy +from typing import Any, Dict, Optional + +from mavedb.view_models.experiment import Experiment, ExperimentCreate + +from tests.helpers.constants import TEST_MINIMAL_EXPERIMENT +from fastapi.testclient import TestClient + + +def create_experiment(client: TestClient, update: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) + if update is not None: + experiment_payload.update(update) + jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.schema()) + + response = client.post("/api/v1/experiments/", json=experiment_payload) + assert response.status_code == 200, "Could not create experiment." + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=Experiment.schema()) + return response_data diff --git a/tests/helpers/util/license.py b/tests/helpers/util/license.py new file mode 100644 index 00000000..895b8a99 --- /dev/null +++ b/tests/helpers/util/license.py @@ -0,0 +1,16 @@ +from sqlalchemy.orm import Session +from mavedb.models.license import License +from mavedb.models.score_set import ScoreSet + + +def change_to_inactive_license(db: Session, urn: str) -> None: + """Change the license of the score set with given urn to an inactive license.""" + item = db.query(ScoreSet).filter(ScoreSet.urn == urn).one_or_none() + assert item is not None + + license = db.query(License).filter(License.active.is_(False)).first() + assert license is not None + + item.license_id = license.id + db.add(item) + db.commit() diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py new file mode 100644 index 00000000..1da70620 --- /dev/null +++ b/tests/helpers/util/score_set.py @@ -0,0 +1,175 @@ +from datetime import date +from copy import deepcopy +from unittest.mock import patch +from typing import Any, Dict, Optional + +import cdot.hgvs.dataproviders +import jsonschema +from sqlalchemy import select + +from mavedb.models.clinical_control import ClinicalControl as ClinicalControlDbModel +from mavedb.models.mapped_variant import MappedVariant as MappedVariantDbModel +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel +from mavedb.models.variant import Variant as VariantDbModel +from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate + +from tests.helpers.constants import ( + TEST_CDOT_TRANSCRIPT, + TEST_MINIMAL_ACC_SCORESET, + TEST_MINIMAL_SEQ_SCORESET, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, + TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, +) +from tests.helpers.util.variant import mock_worker_variant_insertion +from fastapi.testclient import TestClient + + +def create_seq_score_set( + client: TestClient, experiment_urn: Optional[str], update: Optional[Dict[str, Any]] = None +) -> Dict[str, Any]: + score_set_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + if experiment_urn is not None: + score_set_payload["experimentUrn"] = experiment_urn + if update is not None: + score_set_payload.update(update) + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) + + response = client.post("/api/v1/score-sets/", json=score_set_payload) + assert response.status_code == 200, "Could not create sequence based score set" + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + return response_data + + +def create_acc_score_set( + client: TestClient, experiment_urn: Optional[str], update: Optional[Dict[str, Any]] = None +) -> Dict[str, Any]: + score_set_payload = deepcopy(TEST_MINIMAL_ACC_SCORESET) + if experiment_urn is not None: + score_set_payload["experimentUrn"] = experiment_urn + if update is not None: + score_set_payload.update(update) + + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) + + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + response = client.post("/api/v1/score-sets/", json=score_set_payload) + + assert response.status_code == 200, "Could not create accession based score set" + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + return response_data + + +def create_seq_score_set_with_mapped_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_seq_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path + ) + score_set = mock_worker_vrs_mapping(client, db, score_set) + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def create_acc_score_set_with_mapped_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_acc_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path + ) + score_set = mock_worker_vrs_mapping(client, db, score_set) + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def create_seq_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_seq_score_set(client, experiment_urn, update) + score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) + + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def create_acc_score_set_with_variants( + client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None +): + score_set = create_acc_score_set(client, experiment_urn, update) + score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) + + assert ( + score_set["numVariants"] == 3 + ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" + + jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) + return score_set + + +def link_clinical_controls_to_mapped_variants(db, score_set): + mapped_variants = db.scalars( + select(MappedVariantDbModel) + .join(VariantDbModel) + .join(ScoreSetDbModel) + .where(ScoreSetDbModel.urn == score_set["urn"]) + ).all() + + # The first mapped variant gets the clinvar control, the second gets the generic control. + mapped_variants[0].clinical_controls.append( + db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 1)) + ) + mapped_variants[1].clinical_controls.append( + db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 2)) + ) + + db.add(mapped_variants[0]) + db.add(mapped_variants[1]) + db.commit() + + +def mock_worker_vrs_mapping(client, db, score_set, alleles=True): + # The mapping job is tested elsewhere, so insert mapped variants manually. + variants = db.scalars( + select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"]) + ).all() + + # It's un-important what the contents of each mapped VRS object are, so use the same constant for each variant. + for variant in variants: + mapped_variant = MappedVariantDbModel( + pre_mapped=TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X + if alleles + else TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, + post_mapped=TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X + if alleles + else TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, + variant=variant, + vrs_version="2.0", + modification_date=date.today(), + mapped_date=date.today(), + mapping_api_version="pytest.0.0", + current=True, + ) + db.add(mapped_variant) + + db.commit() + + return client.get(f"/api/v1/score-sets/{score_set['urn']}").json() + + +def publish_score_set(client: TestClient, score_set_urn: str) -> Dict[str, Any]: + response = client.post(f"/api/v1/score-sets/{score_set_urn}/publish") + assert response.status_code == 200, f"Could not publish score set {score_set_urn}" + + response_data = response.json() + return response_data diff --git a/tests/helpers/util/user.py b/tests/helpers/util/user.py new file mode 100644 index 00000000..b0ffab54 --- /dev/null +++ b/tests/helpers/util/user.py @@ -0,0 +1,30 @@ +from typing import Any + +from sqlalchemy.orm import Session + +from mavedb.models.user import User + +from tests.helpers.constants import EXTRA_USER + + +def mark_user_inactive(session: Session, username: str) -> User: + user = session.query(User).where(User.username == username).one() + user.is_active = False + + session.add(user) + session.commit() + session.refresh(user) + + return user + + +def change_ownership(db: Session, urn: str, model: Any) -> None: + """Change the ownership of the record with given urn and model to the extra user.""" + item = db.query(model).filter(model.urn == urn).one_or_none() + assert item is not None + extra_user = db.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() + assert extra_user is not None + item.created_by_id = extra_user.id + item.modified_by_id = extra_user.id + db.add(item) + db.commit() diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py new file mode 100644 index 00000000..57bcd436 --- /dev/null +++ b/tests/helpers/util/variant.py @@ -0,0 +1,99 @@ +from typing import Optional + +from arq import ArqRedis +from cdot.hgvs.dataproviders import RESTDataProvider +from fastapi.testclient import TestClient +from sqlalchemy.orm import Session +from sqlalchemy import select +from unittest.mock import patch + +from mavedb.lib.score_sets import create_variants, create_variants_data, csv_data_to_df +from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair +from mavedb.models.enums.processing_state import ProcessingState +from mavedb.models.enums.mapping_state import MappingState +from mavedb.models.mapped_variant import MappedVariant +from mavedb.models.score_set import ScoreSet +from mavedb.models.target_gene import TargetGene +from mavedb.models.variant import Variant + +from tests.helpers.constants import ( + TEST_MINIMAL_MAPPED_VARIANT, + TEST_MINIMAL_PRE_MAPPED_METADATA, + TEST_MINIMAL_POST_MAPPED_METADATA, +) + + +def mock_worker_variant_insertion( + client: TestClient, + db: Session, + data_provider: RESTDataProvider, + score_set: dict, + scores_csv_path: str, + counts_csv_path: Optional[str] = None, +) -> None: + with ( + open(scores_csv_path, "rb") as score_file, + patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, + ): + files = {"scores_file": (scores_csv_path.name, score_file, "rb")} + + if counts_csv_path is not None: + counts_file = open(counts_csv_path, "rb") + files["counts_file"] = (counts_csv_path.name, counts_file, "rb") + else: + counts_file = None + + response = client.post(f"/api/v1/score-sets/{score_set['urn']}/variants/data", files=files) + + # Assert we have mocked a job being added to the queue, and that the request succeeded. The + # response value here isn't important- we will add variants to the score set manually. + worker_queue.assert_called_once() + assert response.status_code == 200 + + if counts_file is not None: + counts_file.close() + + # Reopen files since their buffers are consumed while mocking the variant data post request. + with open(scores_csv_path, "rb") as score_file: + score_df = csv_data_to_df(score_file) + + if counts_csv_path is not None: + with open(counts_csv_path, "rb") as counts_file: + counts_df = csv_data_to_df(counts_file) + else: + counts_df = None + + # Insert variant manually, worker jobs are tested elsewhere separately. + item = db.scalars(select(ScoreSet).where(ScoreSet.urn == score_set["urn"])).one_or_none() + assert item is not None + + scores, counts = validate_and_standardize_dataframe_pair(score_df, counts_df, item.target_genes, data_provider) + variants = create_variants_data(scores, counts, None) + num_variants = create_variants(db, item, variants) + assert num_variants == 3 + + item.processing_state = ProcessingState.success + + db.add(item) + db.commit() + + return client.get(f"api/v1/score-sets/{score_set['urn']}").json() + + +def create_mapped_variants_for_score_set(db, score_set_urn): + score_set = db.scalar(select(ScoreSet).where(ScoreSet.urn == score_set_urn)) + targets = db.scalars(select(TargetGene).where(TargetGene.score_set_id == score_set.id)) + variants = db.scalars(select(Variant).where(Variant.score_set_id == score_set.id)).all() + + for variant in variants: + mv = MappedVariant(**TEST_MINIMAL_MAPPED_VARIANT, variant_id=variant.id) + db.add(mv) + + for target in targets: + target.pre_mapped_metadata = TEST_MINIMAL_PRE_MAPPED_METADATA + target.post_mapped_metadata = TEST_MINIMAL_POST_MAPPED_METADATA + db.add(target) + + score_set.mapping_state = MappingState.complete + db.commit() + return diff --git a/tests/lib/test_authentication.py b/tests/lib/test_authentication.py index d0c1aa0d..53427193 100644 --- a/tests/lib/test_authentication.py +++ b/tests/lib/test_authentication.py @@ -1,74 +1,62 @@ -from unittest.mock import patch +# ruff: noqa: E402 import pytest -from fastapi import HTTPException +from unittest.mock import patch + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") from mavedb.lib.authentication import get_current_user, get_current_user_data_from_api_key from mavedb.models.enums.user_role import UserRole from mavedb.models.user import User from tests.helpers.constants import ADMIN_USER, ADMIN_USER_DECODED_JWT, TEST_USER, TEST_USER_DECODED_JWT -from tests.helpers.util import create_api_key_for_current_user, mark_user_inactive + +from tests.helpers.util.access_key import create_api_key_for_user +from tests.helpers.util.user import mark_user_inactive @pytest.mark.asyncio -async def test_get_current_user_data_from_key_valid_token(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_valid_token(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, access_key) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_data_from_key_invalid_token(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_invalid_token(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, f"invalid_{access_key}") assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_data_from_key_nonetype_token(session, setup_lib_db, client): - create_api_key_for_current_user(client) +async def test_get_current_user_data_from_key_nonetype_token(session, setup_lib_db): + create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio -async def test_get_current_user_via_api_key(session, setup_lib_db, client): - access_key = create_api_key_for_current_user(client) +async def test_get_current_user_via_api_key(session, setup_lib_db): + access_key = create_api_key_for_user(session, TEST_USER["username"]) user_data = await get_current_user_data_from_api_key(session, access_key) user_data = await get_current_user(user_data, None, session, None) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_via_token_payload(session, setup_lib_db): user_data = await get_current_user(None, TEST_USER_DECODED_JWT, session, None) assert user_data.user.username == TEST_USER["username"] - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_no_api_no_jwt(session, setup_lib_db): user_data = await get_current_user(None, None, session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_no_username(session, setup_lib_db): @@ -79,9 +67,6 @@ async def test_get_current_user_no_username(session, setup_lib_db): user_data = await get_current_user(None, jwt_without_sub, session, None) assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio @pytest.mark.parametrize("with_email", [True, False]) @@ -106,9 +91,6 @@ async def test_get_current_user_nonexistent_user(session, setup_lib_db, with_ema # Ensure one user record is in the database session.query(User).filter(User.username == new_user_jwt["sub"]).one() - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_is_inactive(session, setup_lib_db): @@ -117,9 +99,6 @@ async def test_get_current_user_user_is_inactive(session, setup_lib_db): assert user_data is None - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_set_active_roles(session, setup_lib_db): @@ -128,19 +107,13 @@ async def test_get_current_user_set_active_roles(session, setup_lib_db): assert user_data.user.username == ADMIN_USER["username"] assert UserRole.admin in user_data.active_roles - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_with_invalid_role_membership(session, setup_lib_db): - with pytest.raises(HTTPException) as exc_info: + with pytest.raises(Exception) as exc_info: await get_current_user(None, TEST_USER_DECODED_JWT, session, "admin") assert "This user is not a member of the requested acting role." in str(exc_info.value.detail) - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() - @pytest.mark.asyncio async def test_get_current_user_user_extraneous_roles(session, setup_lib_db): @@ -148,6 +121,3 @@ async def test_get_current_user_user_extraneous_roles(session, setup_lib_db): assert user_data.user.username == TEST_USER["username"] assert user_data.active_roles == [] - - # Some lingering db transaction holds this test open unless it is explicitly closed. - session.commit() diff --git a/tests/lib/test_score_set.py b/tests/lib/test_score_set.py index 3179b921..4957c392 100644 --- a/tests/lib/test_score_set.py +++ b/tests/lib/test_score_set.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import io import numpy as np @@ -5,6 +7,10 @@ import pytest from sqlalchemy import select +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.score_sets import ( HGVSColumns, columns_for_dataset, @@ -19,10 +25,17 @@ null_values_list, required_score_column, ) +from mavedb.models.experiment import Experiment +from mavedb.models.license import License from mavedb.models.score_set import ScoreSet +from mavedb.models.target_accession import TargetAccession +from mavedb.models.target_gene import TargetGene +from mavedb.models.target_sequence import TargetSequence +from mavedb.models.taxonomy import Taxonomy from mavedb.models.variant import Variant -from tests.helpers.constants import TEST_SAVED_SCORE_SET_RANGE -from tests.helpers.util import create_acc_score_set, create_experiment, create_seq_score_set +from tests.helpers.constants import TEST_EXPERIMENT, TEST_ACC_SCORESET, TEST_SAVED_SCORE_SET_RANGE, TEST_SEQ_SCORESET +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set def test_columns_for_dataset_no_dataset(): @@ -264,12 +277,37 @@ def test_create_variants_data_scores_and_counts_mismatched_lengths(): create_variants_data(scores_df, counts_df) -def test_create_variants_seq_score_set(setup_lib_db, client, session): - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - score_set = session.scalars(select(ScoreSet)).first() - variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) +def test_create_variants_seq_score_set(setup_lib_db, session): + experiment = Experiment(**TEST_EXPERIMENT, extra_metadata={}) + session.add(experiment) + session.commit() + session.refresh(experiment) + + target_sequences = [ + TargetSequence(**{**seq["target_sequence"], **{"taxonomy": session.scalars(select(Taxonomy)).first()}}) + for seq in TEST_SEQ_SCORESET["target_genes"] + ] + target_genes = [ + TargetGene(**{**gene, **{"target_sequence": target_sequences[idx]}}) + for idx, gene in enumerate(TEST_SEQ_SCORESET["target_genes"]) + ] + + score_set = ScoreSet( + **{ + **TEST_SEQ_SCORESET, + **{ + "experiment_id": experiment.id, + "target_genes": target_genes, + "extra_metadata": {}, + "license": session.scalars(select(License)).first(), + }, + } + ) + session.add(score_set) + session.commit() + session.refresh(score_set) + variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) num_variants = create_variants( session, score_set, @@ -287,12 +325,34 @@ def test_create_variants_seq_score_set(setup_lib_db, client, session): session.commit() -def test_create_variants_acc_score_set(setup_lib_db, client, session): - experiment = create_experiment(client) - score_set = create_acc_score_set(client, experiment["urn"]) - score_set = session.scalars(select(ScoreSet)).first() - variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) +def test_create_variants_acc_score_set(setup_lib_db, session): + experiment = Experiment(**TEST_EXPERIMENT, extra_metadata={}) + session.add(experiment) + session.commit() + session.refresh(experiment) + + target_accessions = [TargetAccession(**seq["target_accession"]) for seq in TEST_ACC_SCORESET["target_genes"]] + target_genes = [ + TargetGene(**{**gene, **{"target_accession": target_accessions[idx]}}) + for idx, gene in enumerate(TEST_ACC_SCORESET["target_genes"]) + ] + score_set = ScoreSet( + **{ + **TEST_ACC_SCORESET, + **{ + "experiment_id": experiment.id, + "target_genes": target_genes, + "extra_metadata": {}, + "license": session.scalars(select(License)).first(), + }, + } + ) + session.add(score_set) + session.commit() + session.refresh(score_set) + + variant_data = create_variants_data(BASE_VARIANTS_SCORE_DF) num_variants = create_variants( session, score_set, diff --git a/tests/routers/conftest.py b/tests/routers/conftest.py index e634f614..8e05a56b 100644 --- a/tests/routers/conftest.py +++ b/tests/routers/conftest.py @@ -1,26 +1,23 @@ from pathlib import Path from shutil import copytree -from unittest.mock import patch -import cdot.hgvs.dataproviders import pytest from mavedb.models.clinical_control import ClinicalControl from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.contributor import Contributor from mavedb.models.enums.user_role import UserRole -from mavedb.models.published_variant import PublishedVariantsMV from mavedb.models.license import License from mavedb.models.role import Role from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User + from tests.helpers.constants import ( ADMIN_USER, TEST_CLINVAR_CONTROL, TEST_GENERIC_CLINICAL_CONTROL, EXTRA_USER, EXTRA_CONTRIBUTOR, - TEST_CDOT_TRANSCRIPT, TEST_DB_KEYWORDS, TEST_LICENSE, TEST_INACTIVE_LICENSE, @@ -28,13 +25,6 @@ TEST_TAXONOMY, TEST_USER, ) -from tests.helpers.util import ( - create_acc_score_set_with_variants, - create_experiment, - create_seq_score_set_with_variants, - create_mapped_variants_for_score_set, - publish_score_set, -) @pytest.fixture @@ -65,32 +55,6 @@ def data_files(tmp_path): return tmp_path / "data" -# Fixtures for setting up score sets on which to calculate statistics. -# Adds an experiment and score set to the database, then publishes the score set. -@pytest.fixture -def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): - experiment = create_experiment(client) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - score_set = create_acc_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores_acc.csv" - ) - publish_score_set(client, score_set["urn"]) - - -@pytest.fixture -def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_files): - experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) - create_mapped_variants_for_score_set(session, score_set["urn"]) - publish_score_set(client, score_set["urn"]) - - # Note that we have not created indexes for this view when it is generated via metadata. This differs - # from the database created via alembic, which does create indexes. - PublishedVariantsMV.refresh(session, False) - - @pytest.fixture def mock_publication_fetch(request, requests_mock): """ diff --git a/tests/routers/test_access_keys.py b/tests/routers/test_access_keys.py index 4e266a0f..836dad6d 100644 --- a/tests/routers/test_access_keys.py +++ b/tests/routers/test_access_keys.py @@ -1,9 +1,18 @@ +# ruff: noqa: E402 + +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.access_key import AccessKey from mavedb.models.enums.user_role import UserRole from mavedb.models.user import User + from tests.helpers.constants import EXTRA_USER from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import create_admin_key_for_current_user, create_api_key_for_current_user +from tests.helpers.util.access_key import create_admin_key_for_current_user, create_api_key_for_current_user def test_create_user_access_key(client, setup_router_db, session): diff --git a/tests/routers/test_collections.py b/tests/routers/test_collections.py index 3fae0d91..ce6a1ef4 100644 --- a/tests/routers/test_collections.py +++ b/tests/routers/test_collections.py @@ -1,12 +1,20 @@ +# ruff: noqa: E402 + import re from copy import deepcopy +from unittest.mock import patch import jsonschema import pytest +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_COLLECTION_URN_RE from mavedb.models.enums.contribution_role import ContributionRole from mavedb.view_models.collection import Collection + from tests.helpers.constants import ( EXTRA_USER, TEST_USER, @@ -14,12 +22,10 @@ TEST_COLLECTION_RESPONSE, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - create_collection, - create_experiment, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.collection import create_collection +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_create_private_collection(client, setup_router_db): @@ -224,10 +230,14 @@ def test_admin_can_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/admins", json={"orcid_id": EXTRA_USER["username"]}) @@ -278,10 +288,14 @@ def test_editor_can_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/editors", json={"orcid_id": EXTRA_USER["username"]}) @@ -326,10 +340,14 @@ def test_viewer_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/viewers", json={"orcid_id": EXTRA_USER["username"]}) @@ -349,10 +367,14 @@ def test_unauthorized_user_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -370,10 +392,14 @@ def test_anonymous_cannot_add_experiment_to_collection( session, client, data_provider, data_files, setup_router_db, anonymous_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -391,10 +417,14 @@ def test_admin_can_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/admins", json={"orcid_id": EXTRA_USER["username"]}) @@ -444,10 +474,14 @@ def test_editor_can_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/editors", json={"orcid_id": EXTRA_USER["username"]}) @@ -491,10 +525,14 @@ def test_viewer_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) client.post(f"/api/v1/collections/{collection['urn']}/viewers", json={"orcid_id": EXTRA_USER["username"]}) @@ -513,10 +551,14 @@ def test_unauthorized_user_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, extra_user_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) @@ -533,10 +575,14 @@ def test_anonymous_cannot_add_score_set_to_collection( session, client, data_provider, data_files, setup_router_db, anonymous_app_overrides ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() collection = create_collection(client) diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 199cd2b7..6908a0ab 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import re from copy import deepcopy from datetime import date @@ -8,12 +10,17 @@ import requests import requests_mock +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel from mavedb.view_models.experiment import Experiment, ExperimentCreate from mavedb.view_models.orcid import OrcidUser + from tests.helpers.constants import ( EXTRA_USER, TEST_BIORXIV_IDENTIFIER, @@ -31,14 +38,11 @@ TEST_USER, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - add_contributor, - change_ownership, - create_experiment, - create_seq_score_set, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.user import change_ownership +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_test_minimal_experiment_is_valid(): @@ -500,11 +504,15 @@ def test_admin_can_update_other_users_private_experiment_set(session, client, ad def test_can_update_own_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + response_data = create_experiment( client, {"experimentSetUrn": published_score_set["experiment"]["experimentSetUrn"], "title": "Second Experiment"}, @@ -515,10 +523,15 @@ def test_can_update_own_public_experiment_set(session, data_provider, client, se def test_cannot_update_other_users_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + published_experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] change_ownership(session, published_experiment_set_urn, ExperimentSetDbModel) experiment_post_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) @@ -533,10 +546,15 @@ def test_anonymous_cannot_update_others_user_public_experiment_set( session, data_provider, client, anonymous_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + published_experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] experiment_post_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) experiment_post_payload.update({"experimentSetUrn": published_experiment_set_urn, "title": "Second Experiment"}) @@ -553,10 +571,14 @@ def test_admin_can_update_other_users_public_experiment_set( session, data_provider, client, admin_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(admin_app_overrides): response_data = create_experiment( @@ -1007,47 +1029,61 @@ def test_search_my_experiments(session, client, setup_router_db): def test_search_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, score_set["urn"]) - meta_score_set = create_seq_score_set_with_variants( + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = publish_score_set(client, meta_score_set["urn"]) score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() search_payload = {"metaAnalysis": True} response = client.post("/api/v1/me/experiments/search", json=search_payload) assert response.status_code == 200 response_data = response.json() - assert any(item["urn"] == meta_score_set["experiment"]["urn"] for item in response_data) + assert any(item["urn"] == published_meta_score_set["experiment"]["urn"] for item in response_data) assert all(item["urn"] != score_set_refresh["experiment"]["urn"] for item in response_data) def test_search_exclude_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - score_set = publish_score_set(client, score_set["urn"]) - meta_score_set = create_seq_score_set_with_variants( + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = publish_score_set(client, meta_score_set["urn"]) score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() search_payload = {"metaAnalysis": False} response = client.post("/api/v1/me/experiments/search", json=search_payload) @@ -1059,14 +1095,17 @@ def test_search_exclude_meta_analysis_experiment(session, data_provider, client, def test_search_score_sets_for_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # make the unpublished score set owned by some other user. This shouldn't appear in the results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) change_ownership(session, score_set_unpub["urn"], ScoreSetDbModel) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") @@ -1080,10 +1119,15 @@ def test_owner_searches_score_sets_with_unpublished_superseding_score_sets_for_e session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -1103,10 +1147,15 @@ def test_non_owner_searches_score_sets_with_unpublished_superseding_score_sets_f session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -1127,22 +1176,28 @@ def test_owner_searches_published_superseding_score_sets_for_experiments( session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, - ) - published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] + superseding_score_set = create_seq_score_set( + client, experiment_urn, update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") assert response.status_code == 200 assert len(response.json()) == 1 @@ -1153,24 +1208,31 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( session, client, setup_router_db, data_files, data_provider ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, unpublished_score_set["urn"]) - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + # On score set publication, the experiment will get a new urn + experiment_urn = published_score_set["experiment"]["urn"] + superseding_score_set = create_seq_score_set( + client, experiment_urn, update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" ) - published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel) - # On score set publication, the experiment will get a new urn - experiment_urn = published_score_set["experiment"]["urn"] + response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") assert response.status_code == 200 assert len(response.json()) == 1 @@ -1179,12 +1241,11 @@ def test_non_owner_searches_published_superseding_score_sets_for_experiments( def test_search_score_sets_for_contributor_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # make the unpublished score set owned by some other user. This shouldn't appear in the results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) change_ownership(session, score_set_unpub["urn"], ScoreSetDbModel) add_contributor( session, @@ -1195,6 +1256,10 @@ def test_search_score_sets_for_contributor_experiments(session, client, setup_ro TEST_USER["last_name"], ) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] response = client.get(f"/api/v1/experiments/{experiment_urn}/score-sets") @@ -1207,12 +1272,14 @@ def test_search_score_sets_for_contributor_experiments(session, client, setup_ro def test_search_score_sets_for_my_experiments(session, client, setup_router_db, data_files, data_provider): experiment = create_experiment(client) - score_set_pub = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + # The unpublished score set is for the current user, so it should show up in results. score_set_unpub = create_seq_score_set(client, experiment["urn"], update={"title": "Unpublished Score Set"}) - published_score_set = publish_score_set(client, score_set_pub["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() # On score set publication, the experiment will get a new urn experiment_urn = published_score_set["experiment"]["urn"] @@ -1278,13 +1345,18 @@ def test_anonymous_cannot_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] with DependencyOverrider(anonymous_app_overrides): - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 401 del_response_data = del_response.json() @@ -1300,11 +1372,16 @@ def test_can_delete_own_private_experiment(session, client, setup_router_db): def test_cannot_delete_own_published_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment_urn = published_score_set["experiment"]["urn"] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 403 @@ -1340,21 +1417,25 @@ def test_contributor_cannot_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment = published_score_set["experiment"] - change_ownership(session, experiment["urn"], ExperimentDbModel) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] + change_ownership(session, experiment_urn, ExperimentDbModel) add_contributor( session, - experiment["urn"], + experiment_urn, ExperimentDbModel, TEST_USER["username"], TEST_USER["first_name"], TEST_USER["last_name"], ) - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 403 @@ -1363,13 +1444,18 @@ def test_admin_can_delete_other_users_published_experiment( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) - experiment = published_score_set["experiment"] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + experiment_urn = score_set["experiment"]["urn"] with DependencyOverrider(admin_app_overrides): - del_response = client.delete(f"/api/v1/experiments/{experiment['urn']}") + del_response = client.delete(f"/api/v1/experiments/{experiment_urn}") assert del_response.status_code == 200 @@ -1384,10 +1470,15 @@ def test_can_add_experiment_to_own_private_experiment_set(session, client, setup def test_can_add_experiment_to_own_public_experiment_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + test_experiment = deepcopy(TEST_MINIMAL_EXPERIMENT) test_experiment.update({"experimentSetUrn": published_score_set["experiment"]["experimentSetUrn"]}) response = client.post("/api/v1/experiments/", json=test_experiment) @@ -1416,10 +1507,15 @@ def test_contributor_can_add_experiment_to_others_public_experiment_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) change_ownership(session, published_score_set["experiment"]["experimentSetUrn"], ExperimentSetDbModel) @@ -1454,10 +1550,15 @@ def test_cannot_add_experiment_to_others_public_experiment_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + experiment_set_urn = published_score_set["experiment"]["experimentSetUrn"] change_ownership(session, published_score_set["urn"], ScoreSetDbModel) change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) diff --git a/tests/routers/test_hgvs.py b/tests/routers/test_hgvs.py index f59e5c27..9a19f709 100644 --- a/tests/routers/test_hgvs.py +++ b/tests/routers/test_hgvs.py @@ -1,8 +1,14 @@ +# ruff: noqa: E402 + from unittest.mock import patch -import cdot.hgvs.dataproviders +import pytest import requests_mock -from hgvs.exceptions import HGVSDataNotAvailableError + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") +hgvs = pytest.importorskip("hgvs") from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION, VALID_GENE @@ -29,7 +35,7 @@ def test_hgvs_fetch_valid(client, setup_router_db): def test_hgvs_fetch_invalid(client, setup_router_db): with patch.object( - cdot.hgvs.dataproviders.ChainedSeqFetcher, "fetch_seq", side_effect=HGVSDataNotAvailableError() + cdot.hgvs.dataproviders.ChainedSeqFetcher, "fetch_seq", side_effect=hgvs.exceptions.HGVSDataNotAvailableError() ) as p: response = client.get(f"/api/v1/hgvs/fetch/{SMALL_ACCESSION}") p.assert_called_once() diff --git a/tests/routers/test_licenses.py b/tests/routers/test_licenses.py index 97c487a3..4d09a11d 100644 --- a/tests/routers/test_licenses.py +++ b/tests/routers/test_licenses.py @@ -1,5 +1,11 @@ +# ruff: noqa: E402 + import pytest +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from tests.helpers.constants import TEST_LICENSE from tests.helpers.dependency_overrider import DependencyOverrider diff --git a/tests/routers/test_permissions.py b/tests/routers/test_permissions.py index ef8bebb1..6b79b81d 100644 --- a/tests/routers/test_permissions.py +++ b/tests/routers/test_permissions.py @@ -1,15 +1,22 @@ +# ruff: noqa: E402 + +from unittest.mock import patch +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel + from tests.helpers.constants import TEST_USER -from tests.helpers.util import ( - add_contributor, - change_ownership, - create_experiment, - create_seq_score_set, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.user import change_ownership +from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion # Test check_authorization function @@ -171,13 +178,18 @@ def test_get_true_permission_from_others_public_experiment_add_score_set_check( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - published_score_set = publish_score_set(client, score_set_1["urn"]) - pub_experiment_urn = published_score_set["experiment"]["urn"] - change_ownership(session, pub_experiment_urn, ExperimentDbModel) - response = client.get(f"/api/v1/permissions/user-is-permitted/experiment/{pub_experiment_urn}/add_score_set") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) + response = client.get(f"/api/v1/permissions/user-is-permitted/experiment/{published_experiment_urn}/add_score_set") assert response.status_code == 200 assert response.json() diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 1ce59e1d..a1a66b1e 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1,3 +1,5 @@ +# ruff: noqa: E402 + import re from copy import deepcopy from datetime import date @@ -5,10 +7,13 @@ import jsonschema import pytest -from arq import ArqRedis from humps import camelize from sqlalchemy import select, delete +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE, MAVEDB_SCORE_SET_URN_RE, MAVEDB_EXPERIMENT_URN_RE from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.clinical_control import ClinicalControl @@ -17,6 +22,7 @@ from mavedb.models.variant import Variant as VariantDbModel from mavedb.view_models.orcid import OrcidUser from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate + from tests.helpers.constants import ( EXTRA_USER, EXTRA_LICENSE, @@ -41,17 +47,18 @@ TEST_SAVED_GENERIC_CLINICAL_CONTROL, ) from tests.helpers.dependency_overrider import DependencyOverrider -from tests.helpers.util import ( - add_contributor, - change_ownership, - change_to_inactive_license, - create_experiment, +from tests.helpers.util.common import update_expected_response_for_created_resources +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.license import change_to_inactive_license +from tests.helpers.util.score_set import ( create_seq_score_set, - create_seq_score_set_with_variants, - update_expected_response_for_created_resources, create_seq_score_set_with_mapped_variants, link_clinical_controls_to_mapped_variants, + publish_score_set, ) +from tests.helpers.util.user import change_ownership +from tests.helpers.util.variant import mock_worker_variant_insertion ######################################################################################################################## @@ -309,15 +316,12 @@ def test_can_update_score_set_supporting_data_after_publication( data_files, ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - published_score_set = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() published_urn = published_score_set["urn"] response = client.get(f"/api/v1/score-sets/{published_urn}") @@ -374,15 +378,12 @@ def test_cannot_update_score_set_target_data_after_publication( client, setup_router_db, attribute, expected_response_data, updated_data, session, data_provider, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - published_score_set = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() published_urn = published_score_set["urn"] response = client.get(f"/api/v1/score-sets/{published_urn}") @@ -540,7 +541,7 @@ def test_add_score_set_variants_scores_only_endpoint(client, setup_router_db, da scores_csv_path = data_files / "scores.csv" with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -566,7 +567,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint(session, client, setu with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -593,7 +594,7 @@ def test_add_score_set_variants_scores_only_endpoint_utf8_encoded(client, setup_ scores_csv_path = data_files / "scores_utf8_encoded.csv" with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -619,7 +620,7 @@ def test_add_score_set_variants_scores_and_counts_endpoint_utf8_encoded(session, with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -711,7 +712,7 @@ def test_contributor_can_add_scores_to_other_user_score_set(session, client, set with ( open(scores_csv_path, "rb") as scores_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -767,7 +768,7 @@ def test_contributor_can_add_scores_and_counts_to_other_user_score_set(session, with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -818,7 +819,7 @@ def test_admin_can_add_scores_to_other_user_score_set( with ( open(scores_csv_path, "rb") as scores_file, DependencyOverrider(admin_app_overrides), - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -844,7 +845,7 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client with ( open(scores_csv_path, "rb") as scores_file, open(counts_csv_path, "rb") as counts_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post( f"/api/v1/score-sets/{score_set['urn']}/variants/data", @@ -872,26 +873,23 @@ def test_admin_can_add_scores_and_counts_to_other_user_score_set(session, client def test_publish_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publication_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publication_response.status_code == 200 - queue.assert_called_once() - response_data = publication_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(response_data["urn"]), re.Match) - assert isinstance(MAVEDB_EXPERIMENT_URN_RE.fullmatch(response_data["experiment"]["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_score_set["urn"]), re.Match) + assert isinstance(MAVEDB_EXPERIMENT_URN_RE.fullmatch(published_score_set["experiment"]["urn"]), re.Match) expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), published_score_set["experiment"], published_score_set ) expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) expected_response.update( { - "urn": response_data["urn"], + "urn": published_score_set["urn"], "publishedDate": date.today().isoformat(), "numVariants": 3, "private": False, @@ -899,10 +897,10 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data "processingState": ProcessingState.success.name, } ) - assert sorted(expected_response.keys()) == sorted(response_data.keys()) + assert sorted(expected_response.keys()) == sorted(published_score_set.keys()) # refresh score set to post worker state - score_set = (client.get(f"/api/v1/score-sets/{response_data['urn']}")).json() + score_set = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() for key in expected_response: assert (key, expected_response[key]) == (key, score_set[key]) @@ -914,27 +912,18 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data def test_publish_multiple_score_sets(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) - score_set_3 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 3"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"]) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + score_set_3 = create_seq_score_set(client, experiment["urn"]) + score_set_3 = mock_worker_variant_insertion(client, session, data_provider, score_set_3, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert pub_score_set_1_response.status_code == 200 - pub_score_set_2_response = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert pub_score_set_2_response.status_code == 200 - pub_score_set_3_response = client.post(f"/api/v1/score-sets/{score_set_3['urn']}/publish") - assert pub_score_set_3_response.status_code == 200 - queue.assert_called() - pub_score_set_1_data = pub_score_set_1_response.json() - pub_score_set_2_data = pub_score_set_2_response.json() - pub_score_set_3_data = pub_score_set_3_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set_1_data = publish_score_set(client, score_set_1["urn"]) + pub_score_set_2_data = publish_score_set(client, score_set_2["urn"]) + pub_score_set_3_data = publish_score_set(client, score_set_3["urn"]) + worker_queue.assert_called() assert pub_score_set_1_data["urn"] == "urn:mavedb:00000001-a-1" assert pub_score_set_1_data["title"] == score_set_1["title"] @@ -964,10 +953,10 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 422 - queue.assert_not_called() + worker_queue.assert_not_called() response_data = response.json() assert "cannot publish score set without variant scores" in response_data["detail"] @@ -975,15 +964,15 @@ def test_cannot_publish_score_set_without_variants(client, setup_router_db): def test_cannot_publish_other_user_private_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 404 - queue.assert_not_called() + worker_queue.assert_not_called() response_data = response.json() assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] @@ -993,13 +982,12 @@ def test_anonymous_cannot_publish_user_private_score_set( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with ( DependencyOverrider(anonymous_app_overrides), - patch.object(ArqRedis, "enqueue_job", return_value=None) as queue, + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, ): response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 401 @@ -1011,9 +999,8 @@ def test_anonymous_cannot_publish_user_private_score_set( def test_contributor_can_publish_other_users_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) add_contributor( session, @@ -1024,22 +1011,20 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c TEST_USER["last_name"], ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - assert response_data["urn"] == "urn:mavedb:00000001-a-1" - assert response_data["experiment"]["urn"] == "urn:mavedb:00000001-a" + assert published_score_set["urn"] == "urn:mavedb:00000001-a-1" + assert published_score_set["experiment"]["urn"] == "urn:mavedb:00000001-a" expected_response = update_expected_response_for_created_resources( - deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), published_score_set["experiment"], published_score_set ) expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) expected_response.update( { - "urn": response_data["urn"], + "urn": published_score_set["urn"], "publishedDate": date.today().isoformat(), "numVariants": 3, "private": False, @@ -1067,10 +1052,10 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c "firstName": EXTRA_USER["first_name"], "lastName": EXTRA_USER["last_name"], } - assert sorted(expected_response.keys()) == sorted(response_data.keys()) + assert sorted(expected_response.keys()) == sorted(published_score_set.keys()) # refresh score set to post worker state - score_set = (client.get(f"/api/v1/score-sets/{response_data['urn']}")).json() + score_set = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() for key in expected_response: assert (key, expected_response[key]) == (key, score_set[key]) @@ -1084,11 +1069,13 @@ def test_admin_cannot_publish_other_user_private_score_set( session, data_provider, client, admin_app_overrides, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with DependencyOverrider(admin_app_overrides), patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: + with ( + DependencyOverrider(admin_app_overrides), + patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as queue, + ): response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") assert response.status_code == 404 queue.assert_not_called() @@ -1104,57 +1091,49 @@ def test_admin_cannot_publish_other_user_private_score_set( def test_create_single_score_set_meta_analysis(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set["urn"]]}, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == [score_set["urn"]] - assert score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + published_score_set_refresh = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == [published_score_set_refresh["urn"]] + assert published_score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) def test_publish_single_score_set_meta_analysis(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" @@ -1164,42 +1143,38 @@ def test_multiple_score_set_meta_analysis_single_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([published_score_set_1["urn"], published_score_set_2["urn"]]) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() + + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000001-0-1" def test_multiple_score_set_meta_analysis_multiple_experiment_sets( @@ -1207,42 +1182,39 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets( ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" + ) + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] + ) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000003-0-1" def test_multiple_score_set_meta_analysis_multiple_experiments( @@ -1252,42 +1224,39 @@ def test_multiple_score_set_meta_analysis_multiple_experiments( experiment_2 = create_experiment( client, {"title": "Experiment 2", "experimentSetUrn": experiment_1["experimentSetUrn"]} ) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - meta_score_set = create_seq_score_set_with_variants( + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] + ) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called_once() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) - assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000001-0-1" def test_multiple_score_set_meta_analysis_multiple_experiment_sets_different_score_sets( @@ -1295,133 +1264,117 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_different_sco ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Exp 1 Score Set 1"}, + + score_set_1_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1 exp 1"}) + score_set_1_1 = mock_worker_variant_insertion( + client, session, data_provider, score_set_1_1, data_files / "scores.csv" ) - score_set_1_2 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Exp 1 Score Set 2"}, + score_set_2_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 2 exp 1"}) + score_set_2_1 = mock_worker_variant_insertion( + client, session, data_provider, score_set_2_1, data_files / "scores.csv" ) - score_set_2_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_2["urn"], - data_files / "scores.csv", - update={"title": "Exp 2 Score Set 1"}, + score_set_1_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 1 exp 2 "}) + score_set_1_2 = mock_worker_variant_insertion( + client, session, data_provider, score_set_1_2, data_files / "scores.csv" ) - score_set_2_2 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_2["urn"], - data_files / "scores.csv", - update={"title": "Exp 2 Score Set 2"}, - ) - - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1_1 = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert response_1_1.status_code == 200 - response_1_2 = client.post(f"/api/v1/score-sets/{score_set_1_2['urn']}/publish") - assert response_1_2.status_code == 200 - response_2_1 = client.post(f"/api/v1/score-sets/{score_set_2_1['urn']}/publish") - assert response_2_1.status_code == 200 - response_2_2 = client.post(f"/api/v1/score-sets/{score_set_2_2['urn']}/publish") - assert response_2_2.status_code == 200 - queue.assert_called() - score_set_1_1 = response_1_1.json() - score_set_1_2 = response_1_2.json() - score_set_2_1 = response_2_1.json() - score_set_2_2 = response_2_2.json() - - meta_score_set_1 = create_seq_score_set_with_variants( + score_set_2_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2 exp 2"}) + score_set_2_2 = mock_worker_variant_insertion( + client, session, data_provider, score_set_2_2, data_files / "scores.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1_1 = publish_score_set(client, score_set_1_1["urn"]) + published_score_set_1_2 = publish_score_set(client, score_set_1_2["urn"]) + published_score_set_2_1 = publish_score_set(client, score_set_2_1["urn"]) + published_score_set_2_2 = publish_score_set(client, score_set_2_2["urn"]) + worker_queue.assert_called() + + meta_score_set_1 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-1 2-1", - "metaAnalyzesScoreSetUrns": [score_set_1_1["urn"], score_set_2_1["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1_1["urn"], published_score_set_1_2["urn"]], }, ) - score_set_1_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1_1['urn']}")).json() - assert meta_score_set_1["metaAnalyzesScoreSetUrns"] == sorted([score_set_1_1["urn"], score_set_2_1["urn"]]) - assert score_set_1_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_1["urn"]] - meta_score_set_2 = create_seq_score_set_with_variants( + meta_score_set_1 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_1, data_files / "scores.csv" + ) + + published_score_set_1_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1_1['urn']}")).json() + assert meta_score_set_1["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1_1["urn"], published_score_set_1_2["urn"]] + ) + assert published_score_set_1_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_1["urn"]] + + meta_score_set_2 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-2 2-2", - "metaAnalyzesScoreSetUrns": [score_set_1_2["urn"], score_set_2_2["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_2_1["urn"], published_score_set_2_2["urn"]], }, ) + meta_score_set_2 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_2, data_files / "scores.csv" + ) + published_score_set_2_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_2_1['urn']}")).json() + assert meta_score_set_2["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_2_1["urn"], published_score_set_2_2["urn"]] + ) + assert published_score_set_2_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set_2["urn"]] - meta_score_set_3 = create_seq_score_set_with_variants( + meta_score_set_3 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", update={ - "title": "Test Meta Analysis 1-1 2-2", - "metaAnalyzesScoreSetUrns": [score_set_1_1["urn"], score_set_2_2["urn"]], + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1_1["urn"], published_score_set_2_2["urn"]], }, ) + meta_score_set_3 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_3, data_files / "scores.csv" + ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_score_set_1 = (client.post(f"/api/v1/score-sets/{meta_score_set_1['urn']}/publish")).json() - assert meta_score_set_1["urn"] == "urn:mavedb:00000003-0-1" - meta_score_set_2 = (client.post(f"/api/v1/score-sets/{meta_score_set_2['urn']}/publish")).json() - assert meta_score_set_2["urn"] == "urn:mavedb:00000003-0-2" - meta_score_set_3 = (client.post(f"/api/v1/score-sets/{meta_score_set_3['urn']}/publish")).json() - assert meta_score_set_3["urn"] == "urn:mavedb:00000003-0-3" - queue.assert_called() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set_1 = publish_score_set(client, meta_score_set_1["urn"]) + published_meta_score_set_2 = publish_score_set(client, meta_score_set_2["urn"]) + published_meta_score_set_3 = publish_score_set(client, meta_score_set_3["urn"]) + worker_queue.assert_called() - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_2["urn"]), re.Match) - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_3["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_1["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_2["urn"]), re.Match) + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set_3["urn"]), re.Match) + assert published_meta_score_set_1["urn"] == "urn:mavedb:00000003-0-1" + assert published_meta_score_set_2["urn"] == "urn:mavedb:00000003-0-2" + assert published_meta_score_set_3["urn"] == "urn:mavedb:00000003-0-3" def test_cannot_add_score_set_to_meta_analysis_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set_1 = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called() - meta_score_set_1 = create_seq_score_set_with_variants( + meta_score_set_1 = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"]]}, + ) + meta_score_set_1 = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set_1, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_score_set_1 = (client.post(f"/api/v1/score-sets/{meta_score_set_1['urn']}/publish")).json() - assert meta_score_set_1["urn"] == "urn:mavedb:00000001-0-1" - queue.assert_called() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set_1 = publish_score_set(client, meta_score_set_1["urn"]) + worker_queue.assert_called() assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set_1["urn"]), re.Match) + assert meta_score_set_1["urn"] == "urn:mavedb:00000001-0-1" + score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_2["experimentUrn"] = meta_score_set_1["experiment"]["urn"] jsonschema.validate(instance=score_set_2, schema=ScoreSetCreate.schema()) @@ -1436,29 +1389,27 @@ def test_create_single_score_set_meta_analysis_to_others_score_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - score_set = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called() - change_ownership(session, score_set["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set["urn"]]}, + update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [published_score_set["urn"]]}, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_refresh = (client.get(f"/api/v1/score-sets/{score_set['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == [score_set["urn"]] - assert score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + published_score_set_refresh = (client.get(f"/api/v1/score-sets/{published_score_set['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == [published_score_set["urn"]] + assert published_score_set_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) @@ -1466,40 +1417,38 @@ def test_multiple_score_set_meta_analysis_single_experiment_with_different_creat session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} - ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} - ) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response_1 = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert response_1.status_code == 200 - response_2 = client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish") - assert response_2.status_code == 200 - queue.assert_called() - score_set_1 = response_1.json() - score_set_2 = response_2.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() - change_ownership(session, score_set_2["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( + change_ownership(session, published_score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set( client, - session, - data_provider, None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, + ) + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] + ) + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called() assert meta_score_set["urn"] == "urn:mavedb:00000001-0-1" assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) @@ -1510,39 +1459,41 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_with_differen ): experiment_1 = create_experiment(client, {"title": "Experiment 1"}) experiment_2 = create_experiment(client, {"title": "Experiment 2"}) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv", update={"title": "Score Set 1"} + score_set_1 = create_seq_score_set(client, experiment_1["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment_2["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + published_score_set_2 = publish_score_set(client, score_set_2["urn"]) + worker_queue.assert_called() + + change_ownership(session, published_score_set_2["urn"], ScoreSetDbModel) + meta_score_set = create_seq_score_set( + client, + None, + update={ + "title": "Test Meta Analysis", + "metaAnalyzesScoreSetUrns": [published_score_set_1["urn"], published_score_set_2["urn"]], + }, ) - score_set_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_2["urn"], data_files / "scores.csv", update={"title": "Score Set 2"} + meta_score_set = mock_worker_variant_insertion( + client, session, data_provider, meta_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - score_set_2 = (client.post(f"/api/v1/score-sets/{score_set_2['urn']}/publish")).json() - queue.assert_called() - - change_ownership(session, score_set_2["urn"], ScoreSetDbModel) - meta_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - None, - data_files / "scores.csv", - update={"title": "Test Meta Analysis", "metaAnalyzesScoreSetUrns": [score_set_1["urn"], score_set_2["urn"]]}, + published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] ) - score_set_1_refresh = (client.get(f"/api/v1/score-sets/{score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([score_set_1["urn"], score_set_2["urn"]]) - assert score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] + assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - meta_response = client.post(f"/api/v1/score-sets/{meta_score_set['urn']}/publish") - assert meta_response.status_code == 200 - queue.assert_called_once() - meta_score_set = meta_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_meta_score_set = publish_score_set(client, meta_score_set["urn"]) + worker_queue.assert_called() - assert meta_score_set["urn"] == "urn:mavedb:00000003-0-1" - assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(meta_score_set["urn"]), re.Match) + assert published_meta_score_set["urn"] == "urn:mavedb:00000003-0-1" + assert isinstance(MAVEDB_SCORE_SET_URN_RE.fullmatch(published_meta_score_set["urn"]), re.Match) ######################################################################################################################## @@ -1551,15 +1502,9 @@ def test_multiple_score_set_meta_analysis_multiple_experiment_sets_with_differen def test_search_private_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) @@ -1568,61 +1513,49 @@ def test_search_private_score_sets_no_match(session, data_provider, client, setu def test_search_private_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == score_set_1_1["title"] + assert response.json()[0]["title"] == score_set["title"] def test_search_private_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - search_payload = {"urn": score_set_1_1["urn"]} + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_1["urn"] + assert response.json()[0]["urn"] == score_set["urn"] # There is space in the end of test urn. The search result returned nothing before. def test_search_private_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - urn_with_space = score_set_1_1["urn"] + " " + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + urn_with_space = score_set["urn"] + " " search_payload = {"urn": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_1["urn"] + assert response.json()[0]["urn"] == score_set["urn"] def test_search_others_private_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1630,16 +1563,11 @@ def test_search_others_private_score_sets_no_match(session, data_provider, clien def test_search_others_private_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "fnord"} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1647,12 +1575,12 @@ def test_search_others_private_score_sets_match(session, data_provider, client, def test_search_others_private_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set_1_1["urn"]} + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 0 @@ -1662,12 +1590,12 @@ def test_search_others_private_score_sets_urn_match(session, data_provider, clie def test_search_others_private_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - change_ownership(session, score_set_1_1["urn"], ScoreSetDbModel) - urn_with_space = score_set_1_1["urn"] + " " + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + change_ownership(session, score_set["urn"], ScoreSetDbModel) + + urn_with_space = score_set["urn"] + " " search_payload = {"urn": urn_with_space} response = client.post("/api/v1/me/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1675,20 +1603,13 @@ def test_search_others_private_score_sets_urn_with_space_match( def test_search_public_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) @@ -1697,38 +1618,29 @@ def test_search_public_score_sets_no_match(session, data_provider, client, setup def test_search_public_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == score_set_1_1["title"] + assert response.json()[0]["title"] == score_set["title"] def test_search_public_score_sets_urn_with_space_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - published_score_set = score_set_response.json() - assert score_set_response.status_code == 200 - queue.assert_called_once() + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() urn_with_space = published_score_set["urn"] + " " search_payload = {"urn": urn_with_space} @@ -1739,23 +1651,16 @@ def test_search_public_score_sets_urn_with_space_match(session, data_provider, c def test_search_others_public_score_sets_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 @@ -1763,65 +1668,52 @@ def test_search_others_public_score_sets_no_match(session, data_provider, client def test_search_others_public_score_sets_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set_1_1 = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Fnord Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Test Fnord Score Set"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + assert session.query(ScoreSetDbModel).filter_by(urn=published_score_set["urn"]).one() - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) - assert session.query(ScoreSetDbModel).filter_by(urn=publish_score_set["urn"]).one() search_payload = {"text": "fnord"} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["title"] == publish_score_set["title"] + assert response.json()[0]["title"] == published_score_set["title"] def test_search_others_public_score_sets_urn_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - publish_score_set = score_set_response.json() - change_ownership(session, publish_score_set["urn"], ScoreSetDbModel) - search_payload = {"urn": score_set_1_1["urn"]} + change_ownership(session, published_score_set["urn"], ScoreSetDbModel) + search_payload = {"urn": score_set["urn"]} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == publish_score_set["urn"] + assert response.json()[0]["urn"] == published_score_set["urn"] def test_search_others_public_score_sets_urn_with_space_match( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = score_set_response.json() change_ownership(session, published_score_set["urn"], ScoreSetDbModel) urn_with_space = published_score_set["urn"] + " " search_payload = {"urn": urn_with_space} @@ -1834,46 +1726,41 @@ def test_search_others_public_score_sets_urn_with_space_match( def test_search_private_score_sets_not_showing_public_score_set( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - score_set_1_2 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() search_payload = {"published": False} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == score_set_1_2["urn"] + assert response.json()[0]["urn"] == score_set_2["urn"] def test_search_public_score_sets_not_showing_private_score_set( session, data_provider, client, setup_router_db, data_files ): - experiment_1 = create_experiment(client) - score_set_1_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment_1["urn"], data_files / "scores.csv" - ) - create_seq_score_set_with_variants(client, session, data_provider, experiment_1["urn"], data_files / "scores.csv") + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set_1 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 1"}) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") + score_set_2 = create_seq_score_set(client, experiment["urn"], update={"title": "Score Set 2"}) + score_set_2 = mock_worker_variant_insertion(client, session, data_provider, score_set_2, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - score_set_response = client.post(f"/api/v1/score-sets/{score_set_1_1['urn']}/publish") - assert score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() - published_score_set = score_set_response.json() search_payload = {"published": True} response = client.post("/api/v1/score-sets/search", json=search_payload) assert response.status_code == 200 assert len(response.json()) == 1 - assert response.json()[0]["urn"] == published_score_set["urn"] + assert response.json()[0]["urn"] == published_score_set_1["urn"] ######################################################################################################################## @@ -1885,9 +1772,8 @@ def test_anonymous_cannot_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with DependencyOverrider(anonymous_app_overrides): response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1900,18 +1786,15 @@ def test_anonymous_cannot_delete_other_users_published_scoreset( session, data_provider, client, setup_router_db, data_files, anonymous_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(anonymous_app_overrides): - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") assert del_response.status_code == 401 del_response_data = del_response.json() @@ -1920,9 +1803,8 @@ def test_anonymous_cannot_delete_other_users_published_scoreset( def test_can_delete_own_private_scoreset(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1931,30 +1813,26 @@ def test_can_delete_own_private_scoreset(session, data_provider, client, setup_r def test_cannot_delete_own_published_scoreset(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") assert del_response.status_code == 403 del_response_data = del_response.json() - assert f"insufficient permissions for URN '{response_data['urn']}'" in del_response_data["detail"] + assert f"insufficient permissions for URN '{published_score_set['urn']}'" in del_response_data["detail"] def test_contributor_can_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) add_contributor( session, @@ -1974,9 +1852,8 @@ def test_admin_can_delete_other_users_private_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") with DependencyOverrider(admin_app_overrides): response = client.delete(f"/api/v1/score-sets/{score_set['urn']}") @@ -1988,20 +1865,16 @@ def test_admin_can_delete_other_users_published_scoreset( session, data_provider, client, setup_router_db, data_files, admin_app_overrides ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert response.status_code == 200 - queue.assert_called_once() - response_data = response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() with DependencyOverrider(admin_app_overrides): - del_response = client.delete(f"/api/v1/score-sets/{response_data['urn']}") - - assert del_response.status_code == 200 + del_response = client.delete(f"/api/v1/score-sets/{published_score_set['urn']}") + assert del_response.status_code == 200 ######################################################################################################################## @@ -2031,33 +1904,32 @@ def test_cannot_add_score_set_to_others_private_experiment(session, client, setu def test_can_add_score_set_to_own_public_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set_1 = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_2["experimentUrn"] = pub_score_set_1["experiment"]["urn"] + score_set_2["experimentUrn"] = published_score_set_1["experiment"]["urn"] response = client.post("/api/v1/score-sets/", json=score_set_2) assert response.status_code == 200 def test_can_add_score_set_to_others_public_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set_1 = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set_1 = create_seq_score_set(client, experiment["urn"]) + score_set_1 = mock_worker_variant_insertion(client, session, data_provider, score_set_1, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - pub_score_set_1 = (client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() - change_ownership(session, pub_score_set_1["experiment"]["urn"], ExperimentDbModel) + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) score_set_2 = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_2["experimentUrn"] = pub_score_set_1["experiment"]["urn"] + score_set_2["experimentUrn"] = published_experiment_urn response = client.post("/api/v1/score-sets/", json=score_set_2) assert response.status_code == 200 @@ -2083,25 +1955,25 @@ def test_contributor_can_add_score_set_to_others_public_experiment( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - published_score_set = (client.post(f"/api/v1/score-sets/{score_set['urn']}/publish")).json() - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - change_ownership(session, published_score_set["experiment"]["urn"], ExperimentDbModel) + published_experiment_urn = published_score_set["experiment"]["urn"] + change_ownership(session, published_experiment_urn, ExperimentDbModel) add_contributor( session, - published_score_set["experiment"]["urn"], + published_experiment_urn, ExperimentDbModel, TEST_USER["username"], TEST_USER["first_name"], TEST_USER["last_name"], ) score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] + score_set_post_payload["experimentUrn"] = published_experiment_urn response = client.post("/api/v1/score-sets/", json=score_set_post_payload) assert response.status_code == 200 @@ -2143,15 +2015,13 @@ def test_can_modify_metadata_for_score_set_with_inactive_license(session, client def test_create_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = publish_score_set_response.json() score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2161,15 +2031,15 @@ def test_create_superseding_score_set(session, data_provider, client, setup_rout def test_can_view_unpublished_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2187,15 +2057,14 @@ def test_cannot_view_others_unpublished_superseding_score_set( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() - published_score_set = publish_score_set_response.json() score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2213,30 +2082,24 @@ def test_cannot_view_others_unpublished_superseding_score_set( def test_can_view_others_published_superseding_score_set(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() - superseding_score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - published_score_set["experiment"]["urn"], - data_files / "scores.csv", - update={"supersededScoreSetUrn": published_score_set["urn"]}, - ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - published_superseding_score_set_response = client.post( - f"/api/v1/score-sets/{superseding_score_set['urn']}/publish" - ) - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_superseding_score_set = published_superseding_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + superseding_score_set = create_seq_score_set( + client, published_score_set["experiment"]["urn"], update={"supersededScoreSetUrn": published_score_set["urn"]} + ) + superseding_score_set = mock_worker_variant_insertion( + client, session, data_provider, superseding_score_set, data_files / "scores.csv" + ) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_superseding_score_set = publish_score_set(client, superseding_score_set["urn"]) + worker_queue.assert_called_once() change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel) @@ -2253,14 +2116,14 @@ def test_show_correct_score_set_version_with_superseded_score_set_to_its_owner( session, data_provider, client, setup_router_db, data_files ): experiment = create_experiment(client) - unpublished_score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - published_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] @@ -2273,6 +2136,11 @@ def test_show_correct_score_set_version_with_superseded_score_set_to_its_owner( assert score_set["urn"] == superseding_score_set["urn"] +######################################################################################################################## +# Score Calibrations +######################################################################################################################## + + def test_anonymous_user_cannot_add_score_calibrations_to_score_set(client, setup_router_db, anonymous_app_overrides): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) @@ -2372,18 +2240,15 @@ def test_upload_a_non_utf8_file(session, client, setup_router_db, data_files): # Test file doesn't have hgvs_splice so its values are all NA. def test_download_scores_file(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - publish_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() download_scores_csv_response = client.get( - f"/api/v1/score-sets/{publish_score_set['urn']}/scores?drop_na_columns=true" + f"/api/v1/score-sets/{published_score_set['urn']}/scores?drop_na_columns=true" ) assert download_scores_csv_response.status_code == 200 download_scores_csv = download_scores_csv_response.text @@ -2396,22 +2261,16 @@ def test_download_scores_file(session, data_provider, client, setup_router_db, d def test_download_counts_file(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment["urn"], - scores_csv_path=data_files / "scores.csv", - counts_csv_path=data_files / "counts.csv", + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion( + client, session, data_provider, score_set, data_files / "scores.csv", data_files / "counts.csv" ) - with patch.object(ArqRedis, "enqueue_job", return_value=None) as queue: - publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert publish_score_set_response.status_code == 200 - queue.assert_called_once() - publish_score_set = publish_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + published_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() download_counts_csv_response = client.get( - f"/api/v1/score-sets/{publish_score_set['urn']}/counts?drop_na_columns=true" + f"/api/v1/score-sets/{published_score_set['urn']}/counts?drop_na_columns=true" ) assert download_counts_csv_response.status_code == 200 download_counts_csv = download_counts_csv_response.text diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index 279147e6..a26f349e 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -1,8 +1,14 @@ -from unittest.mock import patch +# ruff: noqa: E402 -import cdot.hgvs.dataproviders import pytest from humps import camelize +from unittest.mock import patch + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + +from mavedb.models.published_variant import PublishedVariantsMV from tests.helpers.constants import ( TEST_BIORXIV_IDENTIFIER, @@ -14,12 +20,9 @@ TEST_PUBMED_IDENTIFIER, VALID_GENE, ) -from tests.helpers.util import ( - create_acc_score_set_with_variants, - create_experiment, - create_seq_score_set_with_variants, - publish_score_set, -) +from tests.helpers.util.score_set import publish_score_set, create_acc_score_set, create_seq_score_set +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.variant import mock_worker_variant_insertion, create_mapped_variants_for_score_set TARGET_ACCESSION_FIELDS = ["accession", "assembly", "gene"] TARGET_SEQUENCE_FIELDS = ["sequence", "sequence-type"] @@ -36,6 +39,41 @@ } +# Fixtures for setting up score sets on which to calculate statistics. +# Adds an experiment and score set to the database, then publishes the score set. +@pytest.fixture +def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): + experiment = create_experiment(client) + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + score_set = create_acc_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion( + client, session, data_provider, score_set, data_files / "scores_acc.csv" + ) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + + +@pytest.fixture +def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_files): + experiment = create_experiment(client) + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" + ) + create_mapped_variants_for_score_set(session, unpublished_score_set["urn"]) + + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + # Note that we have not created indexes for this view when it is generated via metadata. This differs + # from the database created via alembic, which does create indexes. + PublishedVariantsMV.refresh(session, False) + session.commit() + + def assert_statistic(desired_field_value, response): """ Each statistic test must check that the response code was 200, @@ -206,21 +244,20 @@ def test_target_gene_identifier_statistiscs( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT ): - score_set = create_acc_score_set_with_variants( - client, - session, - data_provider, - experiment["urn"], - data_files / "scores_acc.csv", - {"targetGenes": [target]}, + unpublished_score_set = create_acc_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores_acc.csv" ) elif "targetSequence" in target: - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", {"targetGenes": [target]} + unpublished_score_set = create_seq_score_set(client, experiment["urn"]) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/target/gene/{field_value}") desired_field_value = EXTERNAL_IDENTIFIERS[field_value]["identifier"]["identifier"] @@ -278,11 +315,14 @@ def test_record_publication_identifier_statistics( # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. record_update = {"primaryPublicationIdentifiers": [mocked_publication]} experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/publication-identifiers") @@ -308,11 +348,14 @@ def test_record_keyword_statistics(session, data_provider, client, setup_router_ # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get("/api/v1/statistics/record/experiment/keywords") desired_field_values = ["SaCas9", "Endogenous locus library method", "Base editor", "Other"] @@ -330,11 +373,14 @@ def test_record_doi_identifier_statistics(session, data_provider, client, setup_ # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/doi-identifiers") desired_field_value = record_update["doiIdentifiers"][0]["identifier"] @@ -353,11 +399,14 @@ def test_record_raw_read_identifier_statistics( # Create experiment and score set resources. The fixtures are more useful for the simple cases that don't need scoreset / experiment # updates. Folding these more complex setup steps into a fixture is more trouble than it's worth. experiment = create_experiment(client, record_update) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv", record_update + unpublished_score_set = create_seq_score_set(client, experiment["urn"], record_update) + unpublished_score_set = mock_worker_variant_insertion( + client, session, data_provider, unpublished_score_set, data_files / "scores.csv" ) - publish_score_set(client, score_set["urn"]) + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() response = client.get(f"/api/v1/statistics/record/{model_value}/raw-read-identifiers") desired_field_value = record_update["rawReadIdentifiers"][0]["identifier"] diff --git a/tests/routers/test_target_gene.py b/tests/routers/test_target_gene.py index 4a607101..281c5265 100644 --- a/tests/routers/test_target_gene.py +++ b/tests/routers/test_target_gene.py @@ -1,21 +1,22 @@ +# ruff: noqa: E402 +import pytest + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from tests.helpers.util import ( - change_ownership, - create_experiment, - create_seq_score_set_with_variants, -) + +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.user import change_ownership +from tests.helpers.util.score_set import create_seq_score_set +from tests.helpers.util.variant import mock_worker_variant_insertion def test_search_my_target_genes_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "NONEXISTENT"} response = client.post("/api/v1/me/target-genes/search", json=search_payload) @@ -24,15 +25,9 @@ def test_search_my_target_genes_no_match(session, data_provider, client, setup_r def test_search_my_target_genes_no_match_on_other_user(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "TEST1"} @@ -42,15 +37,9 @@ def test_search_my_target_genes_no_match_on_other_user(session, data_provider, c def test_search_my_target_genes_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "TEST1"} response = client.post("/api/v1/me/target-genes/search", json=search_payload) @@ -60,15 +49,9 @@ def test_search_my_target_genes_match(session, data_provider, client, setup_rout def test_search_target_genes_no_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "NONEXISTENT"} response = client.post("/api/v1/target-genes/search", json=search_payload) @@ -77,15 +60,9 @@ def test_search_target_genes_no_match(session, data_provider, client, setup_rout def test_search_target_genes_match_on_other_user(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - score_set = create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") change_ownership(session, score_set["urn"], ScoreSetDbModel) search_payload = {"text": "TEST1"} @@ -96,15 +73,9 @@ def test_search_target_genes_match_on_other_user(session, data_provider, client, def test_search_target_genes_match(session, data_provider, client, setup_router_db, data_files): - experiment_1 = create_experiment(client, {"title": "Experiment 1"}) - create_seq_score_set_with_variants( - client, - session, - data_provider, - experiment_1["urn"], - data_files / "scores.csv", - update={"title": "Test Score Set"}, - ) + experiment = create_experiment(client, {"title": "Experiment 1"}) + score_set = create_seq_score_set(client, experiment["urn"]) + score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv") search_payload = {"text": "TEST1"} response = client.post("/api/v1/target-genes/search", json=search_payload) diff --git a/tests/routers/test_users.py b/tests/routers/test_users.py index 8110ed72..bae66fbc 100644 --- a/tests/routers/test_users.py +++ b/tests/routers/test_users.py @@ -1,10 +1,16 @@ -from unittest import mock +# ruff: noqa: E402 import pytest +from unittest import mock + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") from mavedb.lib.authentication import get_current_user from mavedb.lib.authorization import require_current_user from mavedb.models.enums.user_role import UserRole + from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER, camelize from tests.helpers.dependency_overrider import DependencyOverrider diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py new file mode 100644 index 00000000..8e4596db --- /dev/null +++ b/tests/validation/dataframe/conftest.py @@ -0,0 +1,43 @@ +import pandas as pd +import pytest +from unittest import mock, TestCase + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from tests.helpers.constants import TEST_CDOT_TRANSCRIPT + + +@pytest.fixture +def mocked_data_provider_class_attr(request): + """ + Sets the `human_data_provider` attribute on the class from the requesting + test context to the `data_provider` fixture. This allows fixture use across + the `unittest.TestCase` class. + """ + data_provider = mock.Mock() + data_provider._get_transcript.return_value = TEST_CDOT_TRANSCRIPT + request.cls.mocked_human_data_provider = data_provider + + +# Special DF Test Case that contains dummy data for tests below +@pytest.mark.usefixtures("mocked_data_provider_class_attr") +class DfTestCase(TestCase): + def setUp(self): + self.dataframe = pd.DataFrame( + { + hgvs_nt_column: ["g.1A>G", "g.1A>T"], + hgvs_splice_column: ["c.1A>G", "c.1A>T"], + hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], + required_score_column: [1.0, 2.0], + "extra": [12.0, 3.0], + "count1": [3.0, 5.0], + "count2": [9, 10], + "extra2": ["pathogenic", "benign"], + "mixed_types": ["test", 1.0], + "null_col": [None, None], + } + ) diff --git a/tests/validation/dataframe/test_column.py b/tests/validation/dataframe/test_column.py new file mode 100644 index 00000000..a11da0bb --- /dev/null +++ b/tests/validation/dataframe/test_column.py @@ -0,0 +1,272 @@ +from unittest import TestCase +from unittest.mock import Mock +import pandas as pd + +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.dataframe.column import ( + construct_target_sequence_mappings, + infer_column_type, + generate_variant_prefixes, + validate_data_column, + validate_hgvs_column_properties, + validate_variant_formatting, + validate_variant_column, +) + +from tests.validation.dataframe.conftest import DfTestCase + + +class TestInferColumnType(TestCase): + def test_floats(self): + test_data = pd.Series([12.0, 1.0, -0.012, 5.75]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_ints(self): + test_data = pd.Series([12, 1, 0, -5]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_floats_with_na(self): + test_data = pd.Series([12.0, 1.0, None, -0.012, 5.75]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_ints_with_na(self): + test_data = pd.Series([12, 1, None, 0, -5]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_convertable_strings(self): + test_data = pd.Series(["12.5", 1.25, "0", "-5"]) + self.assertEqual(infer_column_type(test_data), "numeric") + + def test_strings(self): + test_data = pd.Series(["hello", "test", "suite", "123abc"]) + self.assertEqual(infer_column_type(test_data), "string") + + def test_strings_with_na(self): + test_data = pd.Series(["hello", "test", None, "suite", "123abc"]) + self.assertEqual(infer_column_type(test_data), "string") + + def test_mixed(self): + test_data = pd.Series(["hello", 12.123, -75, "123abc"]) + self.assertEqual(infer_column_type(test_data), "mixed") + + def test_mixed_with_na(self): + test_data = pd.Series(["hello", None, 12.123, -75, "123abc"]) + self.assertEqual(infer_column_type(test_data), "mixed") + + def test_all_na(self): + test_data = pd.Series([None] * 5) + self.assertEqual(infer_column_type(test_data), "empty") + + +class TestValidateVariantFormatting(TestCase): + def setUp(self) -> None: + super().setUp() + + self.valid = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + self.inconsistent = pd.Series(["g.1A>G", "c.1A>T"], name=hgvs_nt_column) + self.valid_prefixes = ["g."] + self.invalid_prefixes = ["c."] + self.valid_target = ["single_target"] + + self.valid_multi = pd.Series(["test1:g.1A>G", "test2:g.1A>T"], name=hgvs_nt_column) + self.invalid_multi = pd.Series(["test3:g.1A>G", "test3:g.1A>T"], name=hgvs_nt_column) + self.inconsistent_multi = pd.Series(["test1:g.1A>G", "test2:c.1A>T"], name=hgvs_nt_column) + self.valid_targets = ["test1", "test2"] + + def test_single_target_valid_variants(self): + validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_target, False) + + def test_single_target_inconsistent_variants(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.inconsistent, self.valid_prefixes, self.valid_target, False) + + def test_single_target_invalid_prefixes(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid, self.invalid_prefixes, self.valid_target, False) + + def test_multi_target_valid_variants(self): + validate_variant_formatting(self.valid_multi, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_inconsistent_variants(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.inconsistent_multi, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_invalid_prefixes(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid_multi, self.invalid_prefixes, self.valid_targets, True) + + def test_multi_target_lacking_full_coords(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_targets, True) + + def test_multi_target_invalid_accessions(self): + with self.assertRaises(ValidationError): + validate_variant_formatting(self.invalid_multi, self.valid_prefixes, self.valid_targets, True) + + +class TestValidateVariantColumn(DfTestCase): + def setUp(self): + super().setUp() + + def test_invalid_column_type_index(self): + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe[required_score_column], True) + + def test_invalid_column_type(self): + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe[required_score_column], False) + + def test_null_values_type_index(self): + self.dataframe.iloc[1, self.dataframe.columns.get_loc(hgvs_nt_column)] = pd.NA + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe.iloc[0, :], True) + + def test_null_values_type(self): + self.dataframe.iloc[1, self.dataframe.columns.get_loc(hgvs_nt_column)] = pd.NA + validate_variant_column(self.dataframe[hgvs_nt_column], False) + + def test_nonunique_values_index(self): + self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] + with self.assertRaises(ValidationError): + validate_variant_column(self.dataframe["dup_col"], True) + + def test_nonunique_values(self): + self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] + validate_variant_column(self.dataframe["dup_col"], False) + + def test_variant_column_is_valid(self): + validate_variant_column(self.dataframe[hgvs_nt_column], True) + + +class TestGenerateVariantPrefixes(DfTestCase): + def setUp(self): + super().setUp() + + self.nt_prefixes = ["c.", "n.", "g.", "m.", "o."] + self.splice_prefixes = ["c.", "n."] + self.pro_prefixes = ["p."] + + def test_nt_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_nt_column]) + assert prefixes == self.nt_prefixes + + def test_pro_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_pro_column]) + assert prefixes == self.pro_prefixes + + def test_splice_prefixes(self): + prefixes = generate_variant_prefixes(self.dataframe[hgvs_splice_column]) + assert prefixes == self.splice_prefixes + + def test_unrecognized_column_prefixes(self): + with self.assertRaises(ValueError): + generate_variant_prefixes(self.dataframe["extra"]) + + +class TestValidateDataColumn(DfTestCase): + def test_valid(self): + validate_data_column(self.dataframe[required_score_column]) + + def test_null_column(self): + self.dataframe[required_score_column] = None + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe[required_score_column]) + + def test_missing_data(self): + self.dataframe.loc[0, "extra"] = None + validate_data_column(self.dataframe["extra"]) + + def test_force_numeric(self): + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe["extra2"], force_numeric=True) + + def test_mixed_types_invalid(self): + with self.assertRaises(ValidationError): + validate_data_column(self.dataframe["mixed_types"]) + + +class TestValidateHgvsColumnProperties(TestCase): + def setUp(self): + self.dna_observed = ["dna"] + self.protein_observed = ["protein"] + self.mixed_observed = ["dna", "protein"] + + def test_valid_dna_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + validate_hgvs_column_properties(column, self.dna_observed) + + def test_invalid_dna_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + with self.assertRaises(ValueError): + validate_hgvs_column_properties(column, self.protein_observed) + + def test_valid_splice_column(self): + column = pd.Series(["c.1-2A>G", "c.1-2A>T"], name=hgvs_splice_column) + validate_hgvs_column_properties(column, self.mixed_observed) + + def test_valid_protein_column(self): + column = pd.Series(["p.Met1Leu", "p.Met1Val"], name=hgvs_pro_column) + validate_hgvs_column_properties(column, self.mixed_observed) + + def test_invalid_column_name(self): + column = pd.Series(["x.1A>G", "x.1A>T"], name="invalid_column") + with self.assertRaises(ValueError): + validate_hgvs_column_properties(column, self.mixed_observed) + + +class TestConstructTargetSequenceMappings(TestCase): + def setUp(self): + mock_seq1, mock_seq2, mock_seq3 = Mock(), Mock(), Mock() + mock_seq1.sequence = "ATGCGT" + mock_seq1.sequence_type = "dna" + mock_seq2.sequence = "MR" + mock_seq2.sequence_type = "protein" + mock_seq3.sequence = None + mock_seq3.sequence_type = "dna" + + self.targets = { + "target1": mock_seq1, + "target2": mock_seq2, + "target3": mock_seq3, + } + + def test_nt_column(self): + column = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) + expected = { + "target1": "ATGCGT", + "target2": "MR", + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_splice_column(self): + column = pd.Series(["c.1-2A>G", "c.1-2A>T"], name=hgvs_splice_column) + expected = { + "target1": None, + "target2": None, + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_pro_column(self): + column = pd.Series(["p.Met1Leu", "p.Met1Val"], name=hgvs_pro_column) + expected = { + "target1": "MR", + "target2": "MR", + "target3": None, + } + result = construct_target_sequence_mappings(column, self.targets) + self.assertEqual(result, expected) + + def test_invalid_column_name(self): + column = pd.Series(["x.1A>G", "x.1A>T"], name="invalid_column") + with self.assertRaises(ValueError): + construct_target_sequence_mappings(column, self.targets) diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py new file mode 100644 index 00000000..4bca6f2f --- /dev/null +++ b/tests/validation/dataframe/test_dataframe.py @@ -0,0 +1,414 @@ +import itertools +from unittest import TestCase + +import numpy as np +import pandas as pd +import pytest + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, + required_score_column, +) +from mavedb.lib.validation.dataframe.dataframe import ( + choose_dataframe_index_column, + sort_dataframe_columns, + standardize_dataframe, + validate_and_standardize_dataframe_pair, + validate_column_names, + validate_hgvs_prefix_combinations, + validate_no_null_rows, + validate_variant_columns_match, +) +from mavedb.lib.validation.exceptions import ValidationError +from tests.validation.dataframe.conftest import DfTestCase + + +class TestSortDataframeColumns(DfTestCase): + def test_preserve_sorted(self): + sorted_df = sort_dataframe_columns(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe(self): + sorted_df = sort_dataframe_columns( + self.dataframe[ + [ + hgvs_splice_column, + "extra", + "count1", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count2", + "extra2", + "mixed_types", + "null_col", + ] + ] + ) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe_is_case_insensitive(self): + self.dataframe = self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}) + sorted_df = sort_dataframe_columns(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, sorted_df) + + def test_sort_dataframe_preserves_extras_order(self): + sorted_df = sort_dataframe_columns( + self.dataframe[ + [ + hgvs_splice_column, + "count2", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count1", + "extra2", + "extra", + "mixed_types", + ] + ] + ) + pd.testing.assert_frame_equal( + self.dataframe[ + [ + hgvs_nt_column, + hgvs_splice_column, + hgvs_pro_column, + required_score_column, + "count2", + "count1", + "extra2", + "extra", + "mixed_types", + ] + ], + sorted_df, + ) + + +class TestStandardizeDataframe(DfTestCase): + def test_preserve_standardized(self): + standardized_df = standardize_dataframe(self.dataframe) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardize_changes_case_variants(self): + standardized_df = standardize_dataframe(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()})) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardice_changes_case_scores(self): + standardized_df = standardize_dataframe( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}) + ) + pd.testing.assert_frame_equal(self.dataframe, standardized_df) + + def test_standardize_preserves_extras_case(self): + standardized_df = standardize_dataframe(self.dataframe.rename(columns={"extra": "extra".upper()})) + pd.testing.assert_frame_equal(self.dataframe.rename(columns={"extra": "extra".upper()}), standardized_df) + + def test_standardize_sorts_columns(self): + standardized_df = standardize_dataframe( + self.dataframe.loc[ + :, + [ + hgvs_splice_column, + "count2", + hgvs_pro_column, + required_score_column, + hgvs_nt_column, + "count1", + "extra", + ], + ] + ) + pd.testing.assert_frame_equal( + self.dataframe[ + [ + hgvs_nt_column, + hgvs_splice_column, + hgvs_pro_column, + required_score_column, + "count2", + "count1", + "extra", + ] + ], + standardized_df, + ) + + +class TestValidateStandardizeDataFramePair(DfTestCase): + def test_no_targets(self): + with self.assertRaises(ValueError): + validate_and_standardize_dataframe_pair( + self.dataframe, counts_df=None, targets=[], hdp=self.mocked_human_data_provider + ) + + # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok + + +class TestNullRows(DfTestCase): + def test_null_row(self): + self.dataframe.iloc[1, :] = None + with self.assertRaises(ValidationError): + validate_no_null_rows(self.dataframe) + + def test_valid(self): + validate_no_null_rows(self.dataframe) + + def test_only_hgvs_row(self): + self.dataframe.loc[1, [required_score_column, "extra", "count1", "count2"]] = None + validate_no_null_rows(self.dataframe) + + +class TestColumnNames(DfTestCase): + def test_only_two_kinds_of_dataframe(self): + with self.assertRaises(ValueError): + validate_column_names(self.dataframe, kind="score2") + + def test_score_df_has_score_column(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") + + def test_count_df_lacks_score_column(self): + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe, kind="counts") + + def test_count_df_has_score_column(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe, kind="counts") + + def test_df_with_only_scores(self): + validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") + + def test_count_df_must_have_data(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") + + def test_just_hgvs_nt(self): + validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + ) + + def test_just_hgvs_pro(self): + validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + ) + + def test_just_hgvs_pro_and_nt(self): + validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") + + def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") + + def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" + ) + + def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") + + def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") + + def test_no_hgvs_column_scores(self): + with pytest.raises(ValidationError) as exc_info: + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" + ) + assert "dataframe does not define any variant columns" in str(exc_info.value) + + def test_no_hgvs_column_counts(self): + with pytest.raises(ValidationError) as exc_info: + validate_column_names( + self.dataframe.drop( + [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 + ), + kind="counts", + ) + assert "dataframe does not define any variant columns" in str(exc_info.value) + + def test_validation_ignores_column_ordering_scores(self): + validate_column_names( + self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" + ) + validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") + validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") + + def test_validation_ignores_column_ordering_counts(self): + validate_column_names( + self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" + ) + validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") + validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") + + def test_validation_is_case_insensitive(self): + validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" + ) + + def test_duplicate_hgvs_column_names_scores(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") + + def test_duplicate_hgvs_column_names_counts(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), + kind="counts", + ) + + def test_duplicate_score_column_names(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") + + def test_duplicate_data_column_names_scores(self): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") + + def test_duplicate_data_column_names_counts(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" + ) + + # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses + def test_invalid_column_names_scores(self): + invalid_values = [None, np.nan, "", " "] + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") + + def test_invalid_column_names_counts(self): + invalid_values = [None, np.nan, "", " "] + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([required_score_column], axis=1).rename( + columns={hgvs_splice_column: value} + ), + kind="counts", + ) + + def test_ignore_column_ordering_scores(self): + validate_column_names( + self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], + kind="scores", + ) + + def test_ignore_column_ordering_counts(self): + validate_column_names( + self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], + kind="counts", + ) + + +class TestChooseDataframeIndexColumn(DfTestCase): + def setUp(self): + super().setUp() + + def test_nt_index_column(self): + index = choose_dataframe_index_column(self.dataframe) + assert index == hgvs_nt_column + + def test_pro_index_column(self): + index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) + assert index == hgvs_pro_column + + def test_no_valid_index_column(self): + with self.assertRaises(ValidationError): + choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) + + +class TestValidateHgvsPrefixCombinations(TestCase): + def setUp(self): + self.valid_combinations = [ + ("g", "c", "p"), + ("m", "c", "p"), + ("o", "c", "p"), + ("g", "n", None), + ("m", "n", None), + ("o", "n", None), + ("n", None, None), + ("c", None, "p"), + (None, None, "p"), + (None, None, None), # valid for this validator, but a dataframe with no variants should be caught upstream + ] + self.invalid_combinations = [ + t + for t in itertools.product(("c", "n", "g", "m", "o", None), ("c", "n", None), ("p", None)) + if t not in self.valid_combinations + ] + + def test_valid_combinations(self): + for t in self.valid_combinations: + with self.subTest(t=t): + validate_hgvs_prefix_combinations(*t, True) + + def test_invalid_combinations(self): + for t in self.invalid_combinations: + with self.subTest(t=t): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(*t, True) + + # TODO: biocommons.HGVS validation clashes here w/ our custom validators: + # n. prefix is the problematic one, for now. + @pytest.mark.skip() + def test_invalid_combinations_biocommons(self): + for t in self.invalid_combinations: + with self.subTest(t=t): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(*t, False) + + def test_invalid_combinations_value_error_nt(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("p", None, None, True) + + def test_invalid_combinations_value_error_nt_pro(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("c", None, "P", True) + + def test_invalid_combinations_value_error_splice(self): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations("x", "c", "p", True) + + +class TestValidateVariantColumnsMatch(DfTestCase): + def test_same_df(self): + validate_variant_columns_match(self.dataframe, self.dataframe) + + def test_ignore_order(self): + validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) + + def test_missing_column(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) + + def test_missing_variant(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_pro_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py new file mode 100644 index 00000000..810780f4 --- /dev/null +++ b/tests/validation/dataframe/test_variant.py @@ -0,0 +1,893 @@ +import pytest +import pandas as pd +import unittest +from unittest.mock import Mock, patch + +from mavedb.lib.validation.constants.general import ( + hgvs_nt_column, + hgvs_pro_column, + hgvs_splice_column, +) +from mavedb.lib.validation.dataframe.variant import ( + validate_hgvs_transgenic_column, + validate_hgvs_genomic_column, + parse_genomic_variant, + parse_transgenic_variant, + validate_observed_sequence_types, + validate_hgvs_prefix_combinations, +) +from mavedb.lib.validation.exceptions import ValidationError + +from tests.helpers.constants import VALID_ACCESSION, TEST_CDOT_TRANSCRIPT +from tests.validation.dataframe.conftest import DfTestCase + + +try: + import hgvs # noqa: F401 + import cdot.hgvs.dataproviders # noqa: F401 + + HGVS_INSTALLED = True +except ModuleNotFoundError: + HGVS_INSTALLED = False + + +# Spoof the target sequence type +class NucleotideSequenceTestCase: + def __init__(self): + self.sequence = "ATG" + self.sequence_type = "dna" + + +class ProteinSequenceTestCase: + def __init__(self): + self.sequence = "MTG" + self.sequence_type = "protein" + + +class TestValidateTransgenicColumn(DfTestCase): + def setUp(self): + super().setUp() + + self.valid_hgvs_columns = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), + pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_splice_column), + pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_nt_only = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), + pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), + ] + + self.valid_hgvs_columns_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_splice_column), + pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_nt_only_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), + ] + + self.valid_hgvs_columns_invalid_names = [ + pd.Series(["g.1A>G", "g.1A>T"], name="invalid_column_name"), + pd.Series(["p.Met1Val", "p.Met1Leu"], name="invalid_column_name"), + ] + + self.valid_hgvs_columns_invalid_names_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name="invalid_column_name"), + pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name="invalid_column_name"), + ] + + self.valid_hgvs_columns_invalid_for_index = [ + # missing data + pd.Series(["c.1A>G", None], name=hgvs_nt_column), + pd.Series([None, "p.Met1Val"], name=hgvs_pro_column), + pd.Series([None, None], name=hgvs_nt_column), + pd.Series([None, None], name=hgvs_pro_column), + # duplicate rows + pd.Series(["c.1A>G", "c.1A>G"], name=hgvs_nt_column), + pd.Series(["p.Met1Val", "p.Met1Val"], name=hgvs_pro_column), + ] + + self.valid_hgvs_columns_invalid_for_index_multi_target = [ + # missing data + pd.Series(["test_nt:c.1A>G", None], name=hgvs_nt_column), + pd.Series([None, "test_pt:p.Met1Val"], name=hgvs_pro_column), + pd.Series([None, None], name=hgvs_nt_column), + pd.Series([None, None], name=hgvs_pro_column), + # duplicate rows + pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>G"], name=hgvs_nt_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Val"], name=hgvs_pro_column), + ] + + self.invalid_hgvs_columns_by_name = [ + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_splice_column), + pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_pro_column), + pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_pro_column), + pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_pro_column), + pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_name_multi_target = [ + pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_splice_column), + pd.Series(["test_pt:g.1A>G", "test_pt:g.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_pro_column), + pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_nt_column), + pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_contents = [ + pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_splice_column), # rna not allowed + pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_nt_column), # rna not allowed + pd.Series(["c.1A>G", "c.5A>T"], name=hgvs_nt_column), # out of bounds for target + pd.Series(["c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series(["p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series(["n.1A>G", "c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix + pd.Series(["c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.invalid_hgvs_columns_by_contents_multi_target = [ + pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_splice_column), # rna not allowed + pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # rna not allowed + pd.Series(["bad_label:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # invalid label + pd.Series(["test_nt:c.1A>G", "test_nt:c.5A>T"], name=hgvs_nt_column), # out of bounds for target + pd.Series(["test_nt:c.1A>G", "test_nt:_wt"], name=hgvs_nt_column), # old special variant + pd.Series(["test_pt:p.Met1Leu", "test_nt:_sy"], name=hgvs_pro_column), # old special variant + pd.Series(["test_nt:n.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series(["test_nt:c.1A>G", "test_pt:p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix + pd.Series(["test_pt:c.1A>G", "bad_label:p.Met1Leu"], name=hgvs_pro_column), # invalid label + pd.Series(["test_nt:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.nt_sequence_test_case = NucleotideSequenceTestCase() + self.pt_sequence_test_case = ProteinSequenceTestCase() + + def test_valid_columns_single_target(self): + for column in self.valid_hgvs_columns: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_multi_target(self): + for column in self.valid_hgvs_columns_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + # Test when supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) + def test_valid_columns_invalid_supplied_targets(self): + for column in self.valid_hgvs_columns_nt_only: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + # Test when multiple supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) + def test_valid_columns_invalid_supplied_targets_multi_target(self): + for column in self.valid_hgvs_columns_nt_only_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_pt": self.pt_sequence_test_case, "test_pt_2": self.pt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_invalid_column_name(self): + for column in self.valid_hgvs_columns_invalid_names: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_columns_invalid_column_name_multi_target(self): + for column in self.valid_hgvs_columns_invalid_names_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValueError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_index_columns(self): + for column in self.valid_hgvs_columns: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_index_columns_multi_target(self): + for column in self.valid_hgvs_columns_multi_target: + with self.subTest(column=column): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.valid_hgvs_columns_invalid_for_index_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_invalid_column_values(self): + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_invalid_column_values_multi_target(self): + for column in self.invalid_hgvs_columns_by_contents_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents_multi_target: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + def test_valid_column_values_wrong_column_name(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case}, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_multi_target(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=False, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_hgvs_transgenic_column( + column, + is_index=True, + targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore + ) + + +# Spoof the accession type +class AccessionTestCase: + def __init__(self): + self.accession = VALID_ACCESSION + + +class GenomicColumnValidationTestCase(DfTestCase): + def setUp(self): + super().setUp() + + self.accession_test_case = AccessionTestCase() + + self.valid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + ) + self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) + self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) + + self.invalid_hgvs_columns_by_name = [ + pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), + pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), + ] + + self.invalid_hgvs_columns_by_contents = [ + pd.Series( + [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column + ), # rna not allowed + pd.Series( + [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column + ), # rna not allowed + pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column + ), # mixed types/prefix + pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified + pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric + pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric + ] + + self.invalid_hgvs_columns_by_contents_under_strict_validation = [ + pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column + ), # out of bounds for target + ] + + +class TestValidateHgvsGenomicColumn(GenomicColumnValidationTestCase): + # Identical behavior for installed/uninstalled HGVS + def test_valid_variant_invalid_missing_index(self): + with ( + self.assertRaises(ValidationError), + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + # Identical behavior for installed/uninstalled HGVS + def test_valid_variant_invalid_duplicate_index(self): + with ( + self.assertRaises(ValidationError), + ): + validate_hgvs_genomic_column( + self.duplicate_data, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +@pytest.fixture +def patched_data_provider_class_attr(request, data_provider): + """ + Sets the `human_data_provider` attribute on the class from the requesting + test context to the `data_provider` fixture. This allows fixture use across + the `unittest.TestCase` class. + """ + request.cls.patched_human_data_provider = data_provider + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +@pytest.mark.usefixtures("patched_data_provider_class_attr") +class TestValidateHgvsGenomicColumnHgvsInstalled(GenomicColumnValidationTestCase): + def test_valid_variant(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_missing(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_duplicate(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_valid_variant_index(self): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, + ) # type: ignore + + def test_invalid_column_values(self): + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + + def test_valid_column_values_wrong_column_name(self): + for column in self.invalid_hgvs_columns_by_name: + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with ( + self.subTest(column=column), + self.assertRaises(ValidationError), + patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + ), + ): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.patched_human_data_provider, # type: ignore + ) + + # TODO: Test multiple targets + + +@unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") +class TestValidateHgvsGenomicColumnHgvsNotInstalled(GenomicColumnValidationTestCase): + def test_valid_variant_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_limited_validation(self): + validate_hgvs_genomic_column( + self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=None + ) # type: ignore + + def test_valid_variant_valid_missing_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_missing_limited_validation(self): + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + + def test_valid_variant_valid_duplicate_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.missing_data, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_valid_duplicate_limited_validation(self): + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + + def test_valid_variant_index_strict_validation(self): + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, + ) # type: ignore + + def test_valid_variant_index_limited_validation(self): + validate_hgvs_genomic_column( + self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=None + ) # type: ignore + + def test_invalid_column_values_strict_validation(self): + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with self.subTest(column=column), self.assertRaises((ValidationError, ModuleNotFoundError)): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + for column in ( + self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation + ): + with self.subTest(column=column), self.assertRaises((ValidationError, ModuleNotFoundError)): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + + def test_invalid_column_values_limited_validation(self): + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_contents_under_strict_validation: + with self.subTest(column=column): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_strict_validation(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=self.mocked_human_data_provider, # type: ignore + ) + + def test_valid_column_values_wrong_column_name_limited_validation(self): + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=False, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + for column in self.invalid_hgvs_columns_by_name: + with self.subTest(column=column), self.assertRaises(ValidationError): + validate_hgvs_genomic_column( + column, + is_index=True, + targets=[self.accession_test_case], + hdp=None, # type: ignore + ) + + +class TestParseGenomicVariant(unittest.TestCase): + def setUp(self): + super().setUp() + + self.parser = Mock() + self.validator = Mock() + self.parser.parse.return_value = "irrelevant" + self.validator.validate.return_value = True + + self.falsy_variant_strings = [None, ""] + self.valid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + ) + self.invalid_hgvs_column = pd.Series( + [f"{VALID_ACCESSION}:c.1laksdfG>A", f"{VALID_ACCESSION}:c.2kadlfjA>T"], name=hgvs_nt_column + ) + + +@unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") +class TestParseGenomicVariantHgvsInstalled(TestParseGenomicVariant): + def test_parse_genomic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, None, self.parser, self.validator) + assert valid + assert error is None + + def test_parse_valid_hgvs_variant(self): + for variant_string in self.valid_hgvs_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + assert valid + assert error is None + + def test_parse_invalid_hgvs_variant(self): + from hgvs.exceptions import HGVSError + + self.validator.validate.side_effect = HGVSError("Invalid variant") + + for variant_string in self.invalid_hgvs_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + assert not valid + assert "Failed to parse row 0 with HGVS exception:" in error + + +@unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") +class TestParseGenomicVariantHgvsNotInstalled(TestParseGenomicVariant): + def test_parse_genomic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, None, self.parser, self.validator) + + def test_parse_valid_hgvs_variant(self): + for variant_string in self.valid_hgvs_column: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + + def test_parse_invalid_hgvs_variant(self): + for variant_string in self.invalid_hgvs_column: + with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): + parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + + +class TestParseTransgenicVariant(unittest.TestCase): + def setUp(self): + super().setUp() + + self.target_sequences = {f"{VALID_ACCESSION}": "ATGC"} + + self.falsy_variant_strings = [None, ""] + self.valid_fully_qualified_transgenic_column = pd.Series( + [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.2T>G {VALID_ACCESSION}:c.2T>G"], name=hgvs_nt_column + ) + self.valid_basic_transgenic_column = pd.Series(["c.1A>G", "c.2T>G c.2T>G"], name=hgvs_nt_column) + self.invalid_transgenic_column = pd.Series(["123A>X", "NM_001:123A>Y"], name=hgvs_nt_column) + self.mismatched_transgenic_column = pd.Series(["c.1T>G", "c.2A>G"], name=hgvs_nt_column) + + def test_parse_transgenic_variant_nonetype_variant_string(self): + for variant_string in self.falsy_variant_strings: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) + assert valid + assert error is None + + def test_parse_valid_fully_qualified_transgenic_variant(self): + for variant_string in self.valid_fully_qualified_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=True + ) + assert valid + assert error is None + + def test_parse_valid_basic_transgenic_variant(self): + for variant_string in self.valid_basic_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert valid + assert error is None + + def test_parse_invalid_transgenic_variant(self): + for variant_string in self.invalid_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert not valid + assert "invalid variant string" in error + + def test_parse_mismatched_transgenic_variant(self): + for variant_string in self.mismatched_transgenic_column: + with self.subTest(variant_string=variant_string): + valid, error = parse_transgenic_variant( + 0, variant_string, self.target_sequences, is_fully_qualified=False + ) + assert not valid + assert "target sequence mismatch" in error + + +class TestValidateObservedSequenceTypes(unittest.TestCase): + def setUp(self): + super().setUp() + + mock_valid_target1 = Mock() + mock_valid_target2 = Mock() + mock_valid_target1.sequence_type = "dna" + mock_valid_target1.sequence = "ATGC" + mock_valid_target2.sequence_type = "protein" + mock_valid_target2.sequence = "NM" + self.valid_targets = { + "NM_001": mock_valid_target1, + "NM_002": mock_valid_target2, + } + + mock_invalid_target1 = Mock() + mock_invalid_target2 = Mock() + mock_invalid_target1.sequence_type = "dna" + mock_invalid_target1.sequence = "ATGC" + mock_invalid_target2.sequence_type = "invalid" + mock_invalid_target2.sequence = "ABCD" + self.invalid_targets = { + "NM_001": mock_invalid_target1, + "NM_002": mock_invalid_target2, + } + + def test_validate_observed_sequence_types(self): + observed_sequence_types = validate_observed_sequence_types(self.valid_targets) + assert observed_sequence_types == ["dna", "protein"] + + def test_validate_invalid_observed_sequence_types(self): + with self.assertRaises(ValueError): + validate_observed_sequence_types(self.invalid_targets) + + def test_validate_observed_sequence_types_no_targets(self): + with self.assertRaises(ValueError): + validate_observed_sequence_types({}) + + +class TestValidateHgvsPrefixCombinations(unittest.TestCase): + def setUp(self): + super().setUp() + + self.valid_combinations = [ + ("c", None, None, False), + ("g", "n", None, False), + ("g", "c", "p", False), + ("n", None, None, True), + ] + + self.invalid_combinations = [ + ("n", "n", None, False), + ("c", "n", None, False), + ("g", "n", "p", False), + ("g", "c", None, False), + ("n", None, "p", False), + ("g", None, None, True), # invalid nucleotide prefix when transgenic + ] + + self.invalid_prefix_values = [ + ("x", None, None, False), # invalid nucleotide prefix + ("c", "x", None, False), # invalid splice prefix + ("c", None, "x", False), # invalid protein prefix + ] + + def test_valid_combinations(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.valid_combinations: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) + + def test_invalid_combinations(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.invalid_combinations: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + with self.assertRaises(ValidationError): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) + + def test_invalid_prefix_values(self): + for hgvs_nt, hgvs_splice, hgvs_pro, transgenic in self.invalid_prefix_values: + with self.subTest(hgvs_nt=hgvs_nt, hgvs_splice=hgvs_splice, hgvs_pro=hgvs_pro, transgenic=transgenic): + with self.assertRaises(ValueError): + validate_hgvs_prefix_combinations(hgvs_nt, hgvs_splice, hgvs_pro, transgenic) diff --git a/tests/validation/test_dataframe.py b/tests/validation/test_dataframe.py deleted file mode 100644 index 378cdd7d..00000000 --- a/tests/validation/test_dataframe.py +++ /dev/null @@ -1,1121 +0,0 @@ -import itertools -from unittest import TestCase -from unittest.mock import patch - -import cdot.hgvs.dataproviders -import numpy as np -import pandas as pd -import pytest - -from mavedb.lib.validation.constants.general import ( - hgvs_nt_column, - hgvs_pro_column, - hgvs_splice_column, - required_score_column, -) -from mavedb.lib.validation.dataframe import ( - choose_dataframe_index_column, - generate_variant_prefixes, - infer_column_type, - sort_dataframe_columns, - standardize_dataframe, - validate_and_standardize_dataframe_pair, - validate_column_names, - validate_data_column, - validate_hgvs_genomic_column, - validate_hgvs_prefix_combinations, - validate_hgvs_transgenic_column, - validate_no_null_rows, - validate_variant_column, - validate_variant_columns_match, - validate_variant_formatting, -) -from mavedb.lib.validation.exceptions import ValidationError -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION - - -@pytest.fixture -def data_provider_class_attr(request, data_provider): - """ - Sets the `human_data_provider` attribute on the class from the requesting - test context to the `data_provider` fixture. This allows fixture use across - the `unittest.TestCase` class. - """ - request.cls.human_data_provider = data_provider - - -# Special DF Test Case that contains dummy data for tests below -@pytest.mark.usefixtures("data_provider_class_attr") -class DfTestCase(TestCase): - def setUp(self): - self.dataframe = pd.DataFrame( - { - hgvs_nt_column: ["g.1A>G", "g.1A>T"], - hgvs_splice_column: ["c.1A>G", "c.1A>T"], - hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], - required_score_column: [1.0, 2.0], - "extra": [12.0, 3.0], - "count1": [3.0, 5.0], - "count2": [9, 10], - "extra2": ["pathogenic", "benign"], - "mixed_types": ["test", 1.0], - "null_col": [None, None], - } - ) - - -class TestInferColumnType(TestCase): - def test_floats(self): - test_data = pd.Series([12.0, 1.0, -0.012, 5.75]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_ints(self): - test_data = pd.Series([12, 1, 0, -5]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_floats_with_na(self): - test_data = pd.Series([12.0, 1.0, None, -0.012, 5.75]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_ints_with_na(self): - test_data = pd.Series([12, 1, None, 0, -5]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_convertable_strings(self): - test_data = pd.Series(["12.5", 1.25, "0", "-5"]) - self.assertEqual(infer_column_type(test_data), "numeric") - - def test_strings(self): - test_data = pd.Series(["hello", "test", "suite", "123abc"]) - self.assertEqual(infer_column_type(test_data), "string") - - def test_strings_with_na(self): - test_data = pd.Series(["hello", "test", None, "suite", "123abc"]) - self.assertEqual(infer_column_type(test_data), "string") - - def test_mixed(self): - test_data = pd.Series(["hello", 12.123, -75, "123abc"]) - self.assertEqual(infer_column_type(test_data), "mixed") - - def test_mixed_with_na(self): - test_data = pd.Series(["hello", None, 12.123, -75, "123abc"]) - self.assertEqual(infer_column_type(test_data), "mixed") - - def test_all_na(self): - test_data = pd.Series([None] * 5) - self.assertEqual(infer_column_type(test_data), "empty") - - -class TestSortDataframeColumns(DfTestCase): - def test_preserve_sorted(self): - sorted_df = sort_dataframe_columns(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe(self): - sorted_df = sort_dataframe_columns( - self.dataframe[ - [ - hgvs_splice_column, - "extra", - "count1", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count2", - "extra2", - "mixed_types", - "null_col", - ] - ] - ) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe_is_case_insensitive(self): - self.dataframe = self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}) - sorted_df = sort_dataframe_columns(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, sorted_df) - - def test_sort_dataframe_preserves_extras_order(self): - sorted_df = sort_dataframe_columns( - self.dataframe[ - [ - hgvs_splice_column, - "count2", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count1", - "extra2", - "extra", - "mixed_types", - ] - ] - ) - pd.testing.assert_frame_equal( - self.dataframe[ - [ - hgvs_nt_column, - hgvs_splice_column, - hgvs_pro_column, - required_score_column, - "count2", - "count1", - "extra2", - "extra", - "mixed_types", - ] - ], - sorted_df, - ) - - -class TestStandardizeDataframe(DfTestCase): - def test_preserve_standardized(self): - standardized_df = standardize_dataframe(self.dataframe) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardize_changes_case_variants(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()})) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardice_changes_case_scores(self): - standardized_df = standardize_dataframe( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}) - ) - pd.testing.assert_frame_equal(self.dataframe, standardized_df) - - def test_standardize_preserves_extras_case(self): - standardized_df = standardize_dataframe(self.dataframe.rename(columns={"extra": "extra".upper()})) - pd.testing.assert_frame_equal(self.dataframe.rename(columns={"extra": "extra".upper()}), standardized_df) - - def test_standardize_sorts_columns(self): - standardized_df = standardize_dataframe( - self.dataframe[ - [ - hgvs_splice_column, - "count2", - hgvs_pro_column, - required_score_column, - hgvs_nt_column, - "count1", - "extra", - ] - ] - ) - pd.testing.assert_frame_equal( - self.dataframe[ - [ - hgvs_nt_column, - hgvs_splice_column, - hgvs_pro_column, - required_score_column, - "count2", - "count1", - "extra", - ] - ], - standardized_df, - ) - - -class TestValidateStandardizeDataFramePair(DfTestCase): - def test_no_targets(self): - with self.assertRaises(ValueError): - validate_and_standardize_dataframe_pair( - self.dataframe, counts_df=None, targets=[], hdp=self.human_data_provider - ) - - # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok - - -class TestValidateDataColumn(DfTestCase): - def test_valid(self): - validate_data_column(self.dataframe[required_score_column]) - - def test_null_column(self): - self.dataframe[required_score_column] = None - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe[required_score_column]) - - def test_missing_data(self): - self.dataframe.loc[0, "extra"] = None - validate_data_column(self.dataframe["extra"]) - - def test_force_numeric(self): - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe["extra2"], force_numeric=True) - - def test_mixed_types_invalid(self): - with self.assertRaises(ValidationError): - validate_data_column(self.dataframe["mixed_types"]) - - -class TestNullRows(DfTestCase): - def test_null_row(self): - self.dataframe.iloc[1, :] = None - with self.assertRaises(ValidationError): - validate_no_null_rows(self.dataframe) - - def test_valid(self): - validate_no_null_rows(self.dataframe) - - def test_only_hgvs_row(self): - self.dataframe.loc[1, [required_score_column, "extra", "count1", "count2"]] = None - validate_no_null_rows(self.dataframe) - - -class TestColumnNames(DfTestCase): - def test_only_two_kinds_of_dataframe(self): - with self.assertRaises(ValueError): - validate_column_names(self.dataframe, kind="score2") - - def test_score_df_has_score_column(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") - - def test_count_df_lacks_score_column(self): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") - - def test_count_df_has_score_column(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") - - def test_df_with_only_scores(self): - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") - - def test_count_df_must_have_data(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") - - def test_just_hgvs_nt(self): - validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") - validate_column_names( - self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" - ) - - def test_just_hgvs_pro(self): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" - ) - - def test_just_hgvs_pro_and_nt(self): - validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") - validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") - - def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") - - def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" - ) - - def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") - - def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") - - def test_no_hgvs_column_scores(self): - with pytest.raises(ValidationError) as exc_info: - validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" - ) - assert "dataframe does not define any variant columns" in str(exc_info.value) - - def test_no_hgvs_column_counts(self): - with pytest.raises(ValidationError) as exc_info: - validate_column_names( - self.dataframe.drop( - [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 - ), - kind="counts", - ) - assert "dataframe does not define any variant columns" in str(exc_info.value) - - def test_validation_ignores_column_ordering_scores(self): - validate_column_names( - self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" - ) - validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") - - def test_validation_ignores_column_ordering_counts(self): - validate_column_names( - self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" - ) - validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") - validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") - - def test_validation_is_case_insensitive(self): - validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") - validate_column_names( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" - ) - - def test_duplicate_hgvs_column_names_scores(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") - - def test_duplicate_hgvs_column_names_counts(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), - kind="counts", - ) - - def test_duplicate_score_column_names(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") - - def test_duplicate_data_column_names_scores(self): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") - - def test_duplicate_data_column_names_counts(self): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" - ) - - # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses - def test_invalid_column_names_scores(self): - invalid_values = [None, np.nan, "", " "] - for value in invalid_values: - with self.subTest(value=value): - with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") - - def test_invalid_column_names_counts(self): - invalid_values = [None, np.nan, "", " "] - for value in invalid_values: - with self.subTest(value=value): - with self.assertRaises(ValidationError): - validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename( - columns={hgvs_splice_column: value} - ), - kind="counts", - ) - - def test_ignore_column_ordering_scores(self): - validate_column_names( - self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], - kind="scores", - ) - - def test_ignore_column_ordering_counts(self): - validate_column_names( - self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], - kind="counts", - ) - - -class TestChooseDataframeIndexColumn(DfTestCase): - def setUp(self): - super().setUp() - - def test_nt_index_column(self): - index = choose_dataframe_index_column(self.dataframe) - assert index == hgvs_nt_column - - def test_pro_index_column(self): - index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) - assert index == hgvs_pro_column - - def test_no_valid_index_column(self): - with self.assertRaises(ValidationError): - choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) - - -class TestValidateHgvsPrefixCombinations(TestCase): - def setUp(self): - self.valid_combinations = [ - ("g", "c", "p"), - ("m", "c", "p"), - ("o", "c", "p"), - ("g", "n", None), - ("m", "n", None), - ("o", "n", None), - ("n", None, None), - ("c", None, "p"), - (None, None, "p"), - (None, None, None), # valid for this validator, but a dataframe with no variants should be caught upstream - ] - self.invalid_combinations = [ - t - for t in itertools.product(("c", "n", "g", "m", "o", None), ("c", "n", None), ("p", None)) - if t not in self.valid_combinations - ] - - def test_valid_combinations(self): - for t in self.valid_combinations: - with self.subTest(t=t): - validate_hgvs_prefix_combinations(*t, True) - - def test_invalid_combinations(self): - for t in self.invalid_combinations: - with self.subTest(t=t): - with self.assertRaises(ValidationError): - validate_hgvs_prefix_combinations(*t, True) - - # TODO: biocommons.HGVS validation clashes here w/ our custom validators: - # n. prefix is the problematic one, for now. - @pytest.mark.skip() - def test_invalid_combinations_biocommons(self): - for t in self.invalid_combinations: - with self.subTest(t=t): - with self.assertRaises(ValidationError): - validate_hgvs_prefix_combinations(*t, False) - - def test_invalid_combinations_value_error_nt(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("p", None, None, True) - - def test_invalid_combinations_value_error_nt_pro(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("c", None, "P", True) - - def test_invalid_combinations_value_error_splice(self): - with self.assertRaises(ValueError): - validate_hgvs_prefix_combinations("x", "c", "p", True) - - -class TestValidateVariantFormatting(TestCase): - def setUp(self) -> None: - super().setUp() - - self.valid = pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column) - self.inconsistent = pd.Series(["g.1A>G", "c.1A>T"], name=hgvs_nt_column) - self.valid_prefixes = ["g."] - self.invalid_prefixes = ["c."] - self.valid_target = ["single_target"] - - self.valid_multi = pd.Series(["test1:g.1A>G", "test2:g.1A>T"], name=hgvs_nt_column) - self.invalid_multi = pd.Series(["test3:g.1A>G", "test3:g.1A>T"], name=hgvs_nt_column) - self.inconsistent_multi = pd.Series(["test1:g.1A>G", "test2:c.1A>T"], name=hgvs_nt_column) - self.valid_targets = ["test1", "test2"] - - def test_single_target_valid_variants(self): - validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_target, False) - - def test_single_target_inconsistent_variants(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.inconsistent, self.valid_prefixes, self.valid_target, False) - - def test_single_target_invalid_prefixes(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid, self.invalid_prefixes, self.valid_target, False) - - def test_multi_target_valid_variants(self): - validate_variant_formatting(self.valid_multi, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_inconsistent_variants(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.inconsistent_multi, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_invalid_prefixes(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid_multi, self.invalid_prefixes, self.valid_targets, True) - - def test_multi_target_lacking_full_coords(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.valid, self.valid_prefixes, self.valid_targets, True) - - def test_multi_target_invalid_accessions(self): - with self.assertRaises(ValidationError): - validate_variant_formatting(self.invalid_multi, self.valid_prefixes, self.valid_targets, True) - - -class TestGenerateVariantPrefixes(DfTestCase): - def setUp(self): - super().setUp() - - self.nt_prefixes = ["c.", "n.", "g.", "m.", "o."] - self.splice_prefixes = ["c.", "n."] - self.pro_prefixes = ["p."] - - def test_nt_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_nt_column]) - assert prefixes == self.nt_prefixes - - def test_pro_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_pro_column]) - assert prefixes == self.pro_prefixes - - def test_splice_prefixes(self): - prefixes = generate_variant_prefixes(self.dataframe[hgvs_splice_column]) - assert prefixes == self.splice_prefixes - - def test_unrecognized_column_prefixes(self): - with self.assertRaises(ValueError): - generate_variant_prefixes(self.dataframe["extra"]) - - -class TestValidateVariantColumn(DfTestCase): - def setUp(self): - super().setUp() - - def test_invalid_column_type_index(self): - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe[required_score_column], True) - - def test_invalid_column_type(self): - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe[required_score_column], False) - - def test_null_values_type_index(self): - self.dataframe[hgvs_nt_column].iloc[1] = pd.NA - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe.iloc[0, :], True) - - def test_null_values_type(self): - self.dataframe[hgvs_nt_column].iloc[1] = pd.NA - validate_variant_column(self.dataframe[hgvs_nt_column], False) - - def test_nonunique_values_index(self): - self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] - with self.assertRaises(ValidationError): - validate_variant_column(self.dataframe["dup_col"], True) - - def test_nonunique_values(self): - self.dataframe["dup_col"] = ["p.Met1Leu", "p.Met1Leu"] - validate_variant_column(self.dataframe["dup_col"], False) - - def test_variant_column_is_valid(self): - validate_variant_column(self.dataframe[hgvs_nt_column], True) - - -class TestValidateVariantColumnsMatch(DfTestCase): - def test_same_df(self): - validate_variant_columns_match(self.dataframe, self.dataframe) - - def test_ignore_order(self): - validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) - - def test_missing_column(self): - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) - - def test_missing_variant(self): - df2 = self.dataframe.copy() - df2.loc[0, hgvs_pro_column] = None - with self.assertRaises(ValidationError): - validate_variant_columns_match(self.dataframe, df2) - - -# Spoof the target sequence type -class NucleotideSequenceTestCase: - def __init__(self): - self.sequence = "ATG" - self.sequence_type = "dna" - - -class ProteinSequenceTestCase: - def __init__(self): - self.sequence = "MTG" - self.sequence_type = "protein" - - -class TestValidateTransgenicColumn(DfTestCase): - def setUp(self): - super().setUp() - - self.valid_hgvs_columns = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), - pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_splice_column), - pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_nt_only = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_nt_column), - pd.Series(["m.1A>G", "m.1A>T"], name=hgvs_nt_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_nt_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_nt_column), - ] - - self.valid_hgvs_columns_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_splice_column), - pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_nt_only_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:m.1A>G", "test_nt:m.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_nt_column), - ] - - self.valid_hgvs_columns_invalid_names = [ - pd.Series(["g.1A>G", "g.1A>T"], name="invalid_column_name"), - pd.Series(["p.Met1Val", "p.Met1Leu"], name="invalid_column_name"), - ] - - self.valid_hgvs_columns_invalid_names_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name="invalid_column_name"), - pd.Series(["test_pt:p.Met1Val", "test_pt:p.Met1Leu"], name="invalid_column_name"), - ] - - self.valid_hgvs_columns_invalid_for_index = [ - # missing data - pd.Series(["c.1A>G", None], name=hgvs_nt_column), - pd.Series([None, "p.Met1Val"], name=hgvs_pro_column), - pd.Series([None, None], name=hgvs_nt_column), - pd.Series([None, None], name=hgvs_pro_column), - # duplicate rows - pd.Series(["c.1A>G", "c.1A>G"], name=hgvs_nt_column), - pd.Series(["p.Met1Val", "p.Met1Val"], name=hgvs_pro_column), - ] - - self.valid_hgvs_columns_invalid_for_index_multi_target = [ - # missing data - pd.Series(["test_nt:c.1A>G", None], name=hgvs_nt_column), - pd.Series([None, "test_pt:p.Met1Val"], name=hgvs_pro_column), - pd.Series([None, None], name=hgvs_nt_column), - pd.Series([None, None], name=hgvs_pro_column), - # duplicate rows - pd.Series(["test_nt:c.1A>G", "test_nt:c.1A>G"], name=hgvs_nt_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Val"], name=hgvs_pro_column), - ] - - self.invalid_hgvs_columns_by_name = [ - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_splice_column), - pd.Series(["g.1A>G", "g.1A>T"], name=hgvs_pro_column), - pd.Series(["c.1A>G", "c.1A>T"], name=hgvs_pro_column), - pd.Series(["n.1A>G", "n.1A>T"], name=hgvs_pro_column), - pd.Series(["p.Met1Val", "p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_name_multi_target = [ - pd.Series(["test_nt:g.1A>G", "test_nt:g.1A>T"], name=hgvs_splice_column), - pd.Series(["test_pt:g.1A>G", "test_pt:g.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:c.1A>G", "test_pt:c.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:n.1A>G", "test_nt:n.1A>T"], name=hgvs_pro_column), - pd.Series(["test_nt:p.Met1Val", "test_nt:p.Met1Leu"], name=hgvs_nt_column), - pd.Series(["test_nt:p.Met1Val", "test_pt:p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_contents = [ - pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_splice_column), # rna not allowed - pd.Series(["r.1a>g", "r.1a>u"], name=hgvs_nt_column), # rna not allowed - pd.Series(["c.1A>G", "c.5A>T"], name=hgvs_nt_column), # out of bounds for target - pd.Series(["c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series(["p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series(["n.1A>G", "c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix - pd.Series(["c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - self.invalid_hgvs_columns_by_contents_multi_target = [ - pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_splice_column), # rna not allowed - pd.Series(["test_nt:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # rna not allowed - pd.Series(["bad_label:r.1a>g", "test_nt:r.1a>u"], name=hgvs_nt_column), # invalid label - pd.Series(["test_nt:c.1A>G", "test_nt:c.5A>T"], name=hgvs_nt_column), # out of bounds for target - pd.Series(["test_nt:c.1A>G", "test_nt:_wt"], name=hgvs_nt_column), # old special variant - pd.Series(["test_pt:p.Met1Leu", "test_nt:_sy"], name=hgvs_pro_column), # old special variant - pd.Series(["test_nt:n.1A>G", "test_nt:c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series(["test_nt:c.1A>G", "test_pt:p.Met1Leu"], name=hgvs_pro_column), # mixed types/prefix - pd.Series(["test_pt:c.1A>G", "bad_label:p.Met1Leu"], name=hgvs_pro_column), # invalid label - pd.Series(["test_nt:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - self.nt_sequence_test_case = NucleotideSequenceTestCase() - self.pt_sequence_test_case = ProteinSequenceTestCase() - - def test_valid_columns_single_target(self): - for column in self.valid_hgvs_columns: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_multi_target(self): - for column in self.valid_hgvs_columns_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - # Test when supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) - def test_valid_columns_invalid_supplied_targets(self): - for column in self.valid_hgvs_columns_nt_only: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - # Test when multiple supplied targets do not contain a DNA sequence (only valid for hgvs_nt col) - def test_valid_columns_invalid_supplied_targets_multi_target(self): - for column in self.valid_hgvs_columns_nt_only_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_pt": self.pt_sequence_test_case, "test_pt_2": self.pt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_invalid_column_name(self): - for column in self.valid_hgvs_columns_invalid_names: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_columns_invalid_column_name_multi_target(self): - for column in self.valid_hgvs_columns_invalid_names_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValueError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_index_columns(self): - for column in self.valid_hgvs_columns: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_index_columns_multi_target(self): - for column in self.valid_hgvs_columns_multi_target: - with self.subTest(column=column): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.valid_hgvs_columns_invalid_for_index_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_invalid_column_values(self): - for column in self.invalid_hgvs_columns_by_contents: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_invalid_column_values_multi_target(self): - for column in self.invalid_hgvs_columns_by_contents_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents_multi_target: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - def test_valid_column_values_wrong_column_name(self): - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case}, # type: ignore - ) - - def test_valid_column_values_wrong_column_name_multi_target(self): - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=False, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with self.subTest(column=column): - with self.assertRaises(ValidationError): - validate_hgvs_transgenic_column( - column, - is_index=True, - targets={"test_nt": self.nt_sequence_test_case, "test_pt": self.pt_sequence_test_case}, # type: ignore - ) - - -# Spoof the accession type -class AccessionTestCase: - def __init__(self): - self.accession = VALID_ACCESSION - - -class TestValidateHgvsGenomicColumn(DfTestCase): - def setUp(self): - super().setUp() - - self.accession_test_case = AccessionTestCase() - - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column - ) - self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) - self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) - - self.invalid_hgvs_columns_by_name = [ - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), - ] - - self.invalid_hgvs_columns_by_contents = [ - pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column - ), # rna not allowed - pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column - ), # rna not allowed - pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column - ), # out of bounds for target - pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix - pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column - ), # mixed types/prefix - pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified - pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric - pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric - ] - - def test_valid_variant(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_valid_missing(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_valid_duplicate(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_index(self): - with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_invalid_missing_index(self): - with ( - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - self.missing_data, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_valid_variant_invalid_duplicate_index(self): - with ( - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - self.duplicate_data, is_index=True, targets=[self.accession_test_case], hdp=self.human_data_provider - ) # type: ignore - - def test_invalid_column_values(self): - for column in self.invalid_hgvs_columns_by_contents: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=False, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_contents: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=True, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - - def test_valid_column_values_wrong_column_name(self): - for column in self.invalid_hgvs_columns_by_name: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=False, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - for column in self.invalid_hgvs_columns_by_name: - with ( - self.subTest(column=column), - self.assertRaises(ValidationError), - patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, - "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, - ), - ): - validate_hgvs_genomic_column( - column, - is_index=True, - targets=[self.accession_test_case], - hdp=self.human_data_provider, # type: ignore - ) - - # TODO: Test multiple targets diff --git a/tests/view_models/test_experiment.py b/tests/view_models/test_experiment.py index 77e9e472..381ea7a1 100644 --- a/tests/view_models/test_experiment.py +++ b/tests/view_models/test_experiment.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.experiment import ExperimentCreate from tests.helpers.constants import TEST_MINIMAL_EXPERIMENT @@ -7,7 +6,7 @@ # Test valid experiment def test_create_experiment(): - experiment = ExperimentCreate(**jsonable_encoder(TEST_MINIMAL_EXPERIMENT)) + experiment = ExperimentCreate(**TEST_MINIMAL_EXPERIMENT) assert experiment.title == "Test Experiment Title" assert experiment.short_description == "Test experiment" assert experiment.abstract_text == "Abstract" @@ -16,9 +15,9 @@ def test_create_experiment(): def test_cannot_create_experiment_without_a_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) + experiment.pop("title") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -26,11 +25,10 @@ def test_cannot_create_experiment_without_a_title(): def test_cannot_create_experiment_with_a_space_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) - invalid_experiment["title"] = " " + experiment["title"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "title" in str(exc_info.value) @@ -38,11 +36,10 @@ def test_cannot_create_experiment_with_a_space_title(): def test_cannot_create_experiment_with_an_empty_title(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"title"}) - invalid_experiment["title"] = "" + experiment["title"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -50,10 +47,10 @@ def test_cannot_create_experiment_with_an_empty_title(): def test_cannot_create_experiment_without_a_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) + experiment.pop("shortDescription") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -61,11 +58,10 @@ def test_cannot_create_experiment_without_a_short_description(): def test_cannot_create_experiment_with_a_space_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) - invalid_experiment["shortDescription"] = " " + experiment["shortDescription"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -73,11 +69,10 @@ def test_cannot_create_experiment_with_a_space_short_description(): def test_cannot_create_experiment_with_an_empty_short_description(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"shortDescription"}) - invalid_experiment["shortDescription"] = "" + experiment["shortDescription"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -85,10 +80,10 @@ def test_cannot_create_experiment_with_an_empty_short_description(): def test_cannot_create_experiment_without_an_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) + experiment.pop("abstractText") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -96,11 +91,10 @@ def test_cannot_create_experiment_without_an_abstract(): def test_cannot_create_experiment_with_a_space_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) - invalid_experiment["abstractText"] = " " + experiment["abstractText"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -108,11 +102,10 @@ def test_cannot_create_experiment_with_a_space_abstract(): def test_cannot_create_experiment_with_an_empty_abstract(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"abstractText"}) - invalid_experiment["abstractText"] = "" + experiment["abstractText"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -120,10 +113,10 @@ def test_cannot_create_experiment_with_an_empty_abstract(): def test_cannot_create_experiment_without_a_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) + experiment.pop("methodText") with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "field required" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -131,11 +124,10 @@ def test_cannot_create_experiment_without_a_method(): def test_cannot_create_experiment_with_a_space_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) - invalid_experiment["methodText"] = " " + experiment["methodText"] = " " with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "This field is required and cannot be empty." in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -143,11 +135,10 @@ def test_cannot_create_experiment_with_a_space_method(): def test_cannot_create_experiment_with_an_empty_method(): experiment = TEST_MINIMAL_EXPERIMENT.copy() - invalid_experiment = jsonable_encoder(experiment, exclude={"methodText"}) - invalid_experiment["methodText"] = "" + experiment["methodText"] = "" with pytest.raises(ValueError) as exc_info: - ExperimentCreate(**invalid_experiment) + ExperimentCreate(**experiment) assert "none is not an allowed value" in str(exc_info.value) assert "methodText" in str(exc_info.value) diff --git a/tests/view_models/test_external_gene_identifiers.py b/tests/view_models/test_external_gene_identifiers.py index 5632975a..a2249c70 100644 --- a/tests/view_models/test_external_gene_identifiers.py +++ b/tests/view_models/test_external_gene_identifiers.py @@ -4,7 +4,7 @@ from mavedb.view_models.external_gene_identifier_offset import ExternalGeneIdentifierOffsetCreate -def test_create_ensemble_identifier(client): +def test_create_ensemble_identifier(): # Test valid identifier db_name = "Ensembl" identifier = "ENSG00000103275" @@ -13,7 +13,7 @@ def test_create_ensemble_identifier(client): assert externalIdentifier.identifier == "ENSG00000103275" -def test_create_invalid_ensemble_identifier(client): +def test_create_invalid_ensemble_identifier(): # Test valid identifier db_name = "Ensembl" invalid_identifier = "not_an_identifier" @@ -22,7 +22,7 @@ def test_create_invalid_ensemble_identifier(client): assert "'not_an_identifier' is not a valid Ensembl accession." in str(exc_info.value) -def test_create_uniprot_identifier(client): +def test_create_uniprot_identifier(): db_name = "UniProt" identifier = "P63279" externalIdentifier = ExternalGeneIdentifierCreate(db_name=db_name, identifier=identifier) @@ -30,7 +30,7 @@ def test_create_uniprot_identifier(client): assert externalIdentifier.identifier == "P63279" -def test_create_invalid_uniprot_identifier(client): +def test_create_invalid_uniprot_identifier(): db_name = "UniProt" invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -38,7 +38,7 @@ def test_create_invalid_uniprot_identifier(client): assert "'not_an_identifier' is not a valid UniProt accession." in str(exc_info.value) -def test_create_refseq_identifier(client): +def test_create_refseq_identifier(): db_name = "RefSeq" identifier = "NM_003345" externalIdentifier = ExternalGeneIdentifierCreate(db_name=db_name, identifier=identifier) @@ -46,7 +46,7 @@ def test_create_refseq_identifier(client): assert externalIdentifier.identifier == "NM_003345" -def test_create_invalid_refseq_identifier(client): +def test_create_invalid_refseq_identifier(): db_name = "RefSeq" invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -54,7 +54,7 @@ def test_create_invalid_refseq_identifier(client): assert "'not_an_identifier' is not a valid RefSeq accession." in str(exc_info.value) -def test_empty_db_name(client): +def test_empty_db_name(): db_name = "" identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -62,7 +62,7 @@ def test_empty_db_name(client): assert "none is not an allowed value" in str(exc_info.value) -def test_space_db_name(client): +def test_space_db_name(): db_name = " " identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -70,7 +70,7 @@ def test_space_db_name(client): assert "db_name should not be empty" in str(exc_info.value) -def test_none_db_name(client): +def test_none_db_name(): db_name = None identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -78,7 +78,7 @@ def test_none_db_name(client): assert "none is not an allowed value" in str(exc_info.value) -def test_invalid_db_name(client): +def test_invalid_db_name(): db_name = "Invalid" identifier = "ENSG00000103275" with pytest.raises(ValueError) as exc_info: @@ -89,13 +89,13 @@ def test_invalid_db_name(client): ) -def test_create_identifier_with_offset(client): +def test_create_identifier_with_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} externalIdentifier = ExternalGeneIdentifierOffsetCreate(identifier=identifier, offset=1) assert externalIdentifier.offset == 1 -def test_create_identifier_with_string_offset(client): +def test_create_identifier_with_string_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} offset = "invalid" with pytest.raises(ValueError) as exc_info: @@ -103,7 +103,7 @@ def test_create_identifier_with_string_offset(client): assert "value is not a valid integer" in str(exc_info.value) -def test_create_identifier_with_negative_offset(client): +def test_create_identifier_with_negative_offset(): identifier = {"db_name": "RefSeq", "identifier": "NM_003345"} with pytest.raises(ValueError) as exc_info: ExternalGeneIdentifierOffsetCreate(identifier=identifier, offset=-10) diff --git a/tests/view_models/test_publication_identifier.py b/tests/view_models/test_publication_identifier.py index b65f9110..f516f87c 100644 --- a/tests/view_models/test_publication_identifier.py +++ b/tests/view_models/test_publication_identifier.py @@ -3,42 +3,42 @@ from mavedb.view_models.publication_identifier import PublicationIdentifierCreate -def test_publication_identifier_create_pubmed_validator(client): +def test_publication_identifier_create_pubmed_validator(): # Test valid pubmed identifier valid_identifier = "20711111" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "20711111" -def test_publication_identifier_create_new_biorxiv_validator(client): +def test_publication_identifier_create_new_biorxiv_validator(): # Test valid new form of biorxiv identifier valid_identifier = "2019.12.12.207222" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "2019.12.12.207222" -def test_publication_identifier_create_old_biorxiv_validator(client): +def test_publication_identifier_create_old_biorxiv_validator(): # Test valid old form of biorxiv identifier valid_identifier = "207222" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "207222" -def test_publication_identifier_create_new_medrxiv_validator(client): +def test_publication_identifier_create_new_medrxiv_validator(): # Test valid new form of medrxiv identifier valid_identifier = "2019.12.12.20733333" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "2019.12.12.20733333" -def test_publication_identifier_create_old_medrxiv_validator(client): +def test_publication_identifier_create_old_medrxiv_validator(): # Test valid old form of medrxiv identifier (this is the same format as pubmed identifiers) valid_identifier = "20733333" pubmed_one = PublicationIdentifierCreate(identifier=valid_identifier) assert pubmed_one.identifier == "20733333" -def test_invalid_publication_identifier_create_validator(client): +def test_invalid_publication_identifier_create_validator(): # Test invalid identifier invalid_identifier = "not_an_identifier" with pytest.raises(ValueError) as exc_info: @@ -48,7 +48,7 @@ def test_invalid_publication_identifier_create_validator(client): ) -def test_invalid_publication_identifier_date_part_create_validator(client): +def test_invalid_publication_identifier_date_part_create_validator(): # Test invalid identifier (date too early on bioRxiv identifier) invalid_identifier = "2018.12.12.207222" with pytest.raises(ValueError) as exc_info: diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index a47c3242..1247020c 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.publication_identifier import PublicationIdentifierCreate from mavedb.view_models.score_set import ScoreSetCreate, ScoreSetModify @@ -9,24 +8,25 @@ def test_cannot_create_score_set_without_a_target(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test, exclude={"targetGenes"}), target_genes=[]) + ScoreSetModify(**score_set_test, target_genes=[]) assert "Score sets should define at least one target." in str(exc_info.value) def test_cannot_create_score_set_with_multiple_primary_publications(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + target_genes = score_set_test.pop("targetGenes") identifier_one = PublicationIdentifierCreate(identifier="2019.12.12.207222") identifier_two = PublicationIdentifierCreate(identifier="2019.12.12.20733333") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test), - exclude={"targetGenes"}, - target_genes=[TargetGeneCreate(**jsonable_encoder(target)) for target in score_set_test["targetGenes"]], + **score_set_test, + target_genes=[TargetGeneCreate(**target) for target in target_genes], primary_publication_identifiers=[identifier_one, identifier_two], ) @@ -36,12 +36,13 @@ def test_cannot_create_score_set_with_multiple_primary_publications(): def test_cannot_create_score_set_without_target_gene_labels_when_multiple_targets_exist(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - target_gene_one = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) - target_gene_two = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test, exclude={"targetGenes"}), + **score_set_test, target_genes=[target_gene_one, target_gene_two], ) @@ -51,16 +52,17 @@ def test_cannot_create_score_set_without_target_gene_labels_when_multiple_target def test_cannot_create_score_set_with_non_unique_target_labels(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - target_gene_one = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) - target_gene_two = TargetGeneCreate(**jsonable_encoder(score_set_test["targetGenes"][0])) + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) non_unique = "BRCA1" target_gene_one.target_sequence.label = non_unique target_gene_two.target_sequence.label = non_unique + score_set_test.pop("targetGenes") with pytest.raises(ValueError) as exc_info: ScoreSetModify( - **jsonable_encoder(score_set_test, exclude={"targetGenes"}), + **score_set_test, target_genes=[target_gene_one, target_gene_two], ) @@ -69,9 +71,10 @@ def test_cannot_create_score_set_with_non_unique_target_labels(): def test_cannot_create_score_set_without_a_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) + score_set.pop("title") + with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -79,11 +82,10 @@ def test_cannot_create_score_set_without_a_title(): def test_cannot_create_score_set_with_a_space_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) - invalid_score_set["title"] = " " + score_set["title"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "title" in str(exc_info.value) @@ -91,11 +93,10 @@ def test_cannot_create_score_set_with_a_space_title(): def test_cannot_create_score_set_with_an_empty_title(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"title"}) - invalid_score_set["title"] = "" + score_set["title"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "title" in str(exc_info.value) @@ -103,10 +104,10 @@ def test_cannot_create_score_set_with_an_empty_title(): def test_cannot_create_score_set_without_a_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) + score_set.pop("shortDescription") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -114,11 +115,10 @@ def test_cannot_create_score_set_without_a_short_description(): def test_cannot_create_score_set_with_a_space_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) - invalid_score_set["shortDescription"] = " " + score_set["shortDescription"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -126,11 +126,10 @@ def test_cannot_create_score_set_with_a_space_short_description(): def test_cannot_create_score_set_with_an_empty_short_description(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"shortDescription"}) - invalid_score_set["shortDescription"] = "" + score_set["shortDescription"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "shortDescription" in str(exc_info.value) @@ -138,10 +137,10 @@ def test_cannot_create_score_set_with_an_empty_short_description(): def test_cannot_create_score_set_without_an_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) + score_set.pop("abstractText") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -149,11 +148,10 @@ def test_cannot_create_score_set_without_an_abstract(): def test_cannot_create_score_set_with_a_space_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) - invalid_score_set["abstractText"] = " " + score_set["abstractText"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -161,11 +159,10 @@ def test_cannot_create_score_set_with_a_space_abstract(): def test_cannot_create_score_set_with_an_empty_abstract(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"abstractText"}) - invalid_score_set["abstractText"] = "" + score_set["abstractText"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "abstractText" in str(exc_info.value) @@ -173,10 +170,10 @@ def test_cannot_create_score_set_with_an_empty_abstract(): def test_cannot_create_score_set_without_a_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) + score_set.pop("methodText") with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "field required" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -184,11 +181,10 @@ def test_cannot_create_score_set_without_a_method(): def test_cannot_create_score_set_with_a_space_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) - invalid_score_set["methodText"] = " " + score_set["methodText"] = " " with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "This field is required and cannot be empty." in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -196,11 +192,10 @@ def test_cannot_create_score_set_with_a_space_method(): def test_cannot_create_score_set_with_an_empty_method(): score_set = TEST_MINIMAL_SEQ_SCORESET.copy() - invalid_score_set = jsonable_encoder(score_set, exclude={"methodText"}) - invalid_score_set["methodText"] = "" + score_set["methodText"] = "" with pytest.raises(ValueError) as exc_info: - ScoreSetCreate(**invalid_score_set) + ScoreSetCreate(**score_set) assert "none is not an allowed value" in str(exc_info.value) assert "methodText" in str(exc_info.value) @@ -217,7 +212,7 @@ def test_cannot_create_score_set_with_too_many_boundaries(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Only a lower and upper bound are allowed." in str(exc_info.value) @@ -233,7 +228,7 @@ def test_cannot_create_score_set_with_overlapping_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -249,7 +244,7 @@ def test_can_create_score_set_with_mixed_range_types(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_adjacent_ranges(): @@ -262,7 +257,7 @@ def test_can_create_score_set_with_adjacent_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_flipped_adjacent_ranges(): @@ -275,7 +270,7 @@ def test_can_create_score_set_with_flipped_adjacent_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_adjacent_negative_ranges(): @@ -288,7 +283,7 @@ def test_can_create_score_set_with_adjacent_negative_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_can_create_score_set_with_flipped_adjacent_negative_ranges(): @@ -301,7 +296,7 @@ def test_can_create_score_set_with_flipped_adjacent_negative_ranges(): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) def test_cannot_create_score_set_with_overlapping_upper_unbounded_ranges(): @@ -315,7 +310,7 @@ def test_cannot_create_score_set_with_overlapping_upper_unbounded_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -331,7 +326,7 @@ def test_cannot_create_score_set_with_overlapping_lower_unbounded_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) @@ -347,7 +342,7 @@ def test_cannot_create_score_set_with_backwards_bounds(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "The lower bound of the score range may not be larger than the upper bound." in str(exc_info.value) @@ -362,7 +357,7 @@ def test_cannot_create_score_set_with_equal_bounds(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "The lower and upper bound of the score range may not be the same." in str(exc_info.value) @@ -378,7 +373,7 @@ def test_cannot_create_score_set_with_duplicate_range_labels(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Detected repeated label: `range_1`. Range labels must be unique." in str(exc_info.value) @@ -394,7 +389,7 @@ def test_cannot_create_score_set_with_duplicate_range_labels_whitespace(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "Detected repeated label: `range_1`. Range labels must be unique." in str(exc_info.value) @@ -411,7 +406,7 @@ def test_cannot_create_score_set_with_wild_type_outside_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert ( f"The provided wild type score of {wt_score} is not within any of the provided normal ranges. This score should be within a normal range." @@ -431,7 +426,7 @@ def test_cannot_create_score_set_with_wild_type_outside_normal_range(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert ( f"The provided wild type score of {wt_score} is not within any of the provided normal ranges. This score should be within a normal range." @@ -450,7 +445,7 @@ def test_cannot_create_score_set_with_wild_type_score_and_no_normal_range(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "A wild type score has been provided, but no normal classification range exists." in str(exc_info.value) @@ -480,7 +475,7 @@ def test_cannot_create_score_set_without_default_ranges(): } with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) diff --git a/tests/view_models/test_user.py b/tests/view_models/test_user.py index b72d0d5d..8650f343 100644 --- a/tests/view_models/test_user.py +++ b/tests/view_models/test_user.py @@ -1,5 +1,4 @@ import pytest -from fastapi.encoders import jsonable_encoder from mavedb.view_models.user import CurrentUserUpdate from tests.helpers.constants import TEST_USER @@ -7,6 +6,8 @@ # There are lots of potentially invalid emails, but this test is intented to ensure # the validator is active, so just use a simple one. -def test_cannot_update_user_with_invalid_email(client): +def test_cannot_update_user_with_invalid_email(): + user = TEST_USER.copy() + user["email"] = "invalidemail@" with pytest.raises(ValueError): - CurrentUserUpdate(**jsonable_encoder(TEST_USER, exclude={"email"}), email="invalidemail@") + CurrentUserUpdate(**user) diff --git a/tests/view_models/test_wild_type_sequence.py b/tests/view_models/test_wild_type_sequence.py index 25415fc1..47401871 100644 --- a/tests/view_models/test_wild_type_sequence.py +++ b/tests/view_models/test_wild_type_sequence.py @@ -28,21 +28,21 @@ ("Protein", "startrek"), ], ) -def test_create_wild_type_sequence(client, sequence_type, sequence): +def test_create_wild_type_sequence(sequence_type, sequence): TargetSeq = TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert TargetSeq.sequence_type == sequence_type.lower() assert TargetSeq.sequence == sequence.upper() @pytest.mark.parametrize("sequence_type, sequence", [("dnaaa", "ATGAGTATTCAACATTTCCGTGTC"), ("null", "STARTREK")]) -def test_create_invalid_sequence_type(client, sequence_type, sequence): +def test_create_invalid_sequence_type(sequence_type, sequence): with pytest.raises(ValueError) as exc_info: TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert f"'{sequence_type}' is not a valid sequence type" in str(exc_info.value) @pytest.mark.parametrize("sequence_type, sequence", [("dna", "ARCG"), ("protein", "AzCG")]) -def test_create_invalid_sequence(client, sequence_type, sequence): +def test_create_invalid_sequence(sequence_type, sequence): with pytest.raises(ValueError) as exc_info: TargetSequenceCreate(sequence_type=sequence_type, sequence=sequence, taxonomy=taxonomy) assert f"invalid {sequence_type} sequence provided" in str(exc_info.value) diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py index 7d989005..fedf2f1f 100644 --- a/tests/worker/conftest.py +++ b/tests/worker/conftest.py @@ -6,8 +6,8 @@ from mavedb.models.license import License from mavedb.models.taxonomy import Taxonomy from mavedb.models.user import User + from tests.helpers.constants import EXTRA_USER, TEST_LICENSE, TEST_INACTIVE_LICENSE, TEST_TAXONOMY, TEST_USER -from tests.helpers.util import create_experiment, create_seq_score_set @pytest.fixture @@ -21,15 +21,6 @@ def setup_worker_db(session): db.commit() -@pytest.fixture -def populate_worker_db(data_files, client): - # create score set via API. In production, the API would invoke this worker job - experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) - - return score_set["urn"] - - @pytest.fixture def data_files(tmp_path): copytree(Path(__file__).absolute().parent / "data", tmp_path / "data") diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index dde19500..f7baba9e 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -1,17 +1,20 @@ +# ruff: noqa: E402 + from asyncio.unix_events import _UnixSelectorEventLoop from copy import deepcopy from datetime import date from unittest.mock import patch from uuid import uuid4 -import arq.jobs -import cdot.hgvs.dataproviders import jsonschema import pandas as pd import pytest -from arq import ArqRedis from sqlalchemy import not_, select +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.data_providers.services import VRSMap from mavedb.lib.mave.constants import HGVS_NT_COLUMN from mavedb.lib.score_sets import csv_data_to_df @@ -34,6 +37,8 @@ submit_score_set_mappings_to_ldh, link_clingen_variants, ) + + from tests.helpers.constants import ( TEST_CDOT_TRANSCRIPT, TEST_CLINGEN_SUBMISSION_RESPONSE, @@ -50,7 +55,18 @@ TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, ) -from tests.helpers.util import awaitable_exception +from tests.helpers.util.exceptions import awaitable_exception +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set + + +@pytest.fixture +def populate_worker_db(data_files, client): + # create score set via API. In production, the API would invoke this worker job + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + + return score_set["urn"] async def setup_records_and_files(async_client, data_files, input_score_set): @@ -861,7 +877,7 @@ async def test_create_mapped_variants_for_scoreset_mapping_exception_retry_faile "run_in_executor", return_value=awaitable_exception(), ), - patch.object(ArqRedis, "lpush", awaitable_exception()), + patch.object(arq.ArqRedis, "lpush", awaitable_exception()), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -955,7 +971,7 @@ async def dummy_mapping_job(): "run_in_executor", return_value=dummy_mapping_job(), ), - patch.object(ArqRedis, "lpush", awaitable_exception()), + patch.object(arq.ArqRedis, "lpush", awaitable_exception()), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -1080,7 +1096,7 @@ async def test_mapping_manager_empty_queue(setup_worker_db, standalone_worker_co @pytest.mark.asyncio async def test_mapping_manager_empty_queue_error_during_setup(setup_worker_db, standalone_worker_context): await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") - with patch.object(ArqRedis, "rpop", Exception()): + with patch.object(arq.ArqRedis, "rpop", Exception()): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) # No new jobs should have been created if nothing is in the queue, and the queue should remain empty. @@ -1162,7 +1178,7 @@ async def test_mapping_manager_occupied_queue_mapping_in_progress_error_during_e await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "5") with ( patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.in_progress), - patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), ): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) @@ -1190,7 +1206,7 @@ async def test_mapping_manager_occupied_queue_mapping_not_in_progress_error_duri await standalone_worker_context["redis"].set(MAPPING_CURRENT_ID_NAME, "") with ( patch.object(arq.jobs.Job, "status", return_value=arq.jobs.JobStatus.not_found), - patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), ): result = await variant_mapper_manager(standalone_worker_context, uuid4().hex, 1) From 038b012229ed79227dd4bb9e34415467e149a1d7 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:27:01 -0800 Subject: [PATCH 091/166] Bump Dependencies --- poetry.lock | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8f9ea2a8..0084bab7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -688,13 +688,13 @@ crt = ["awscrt (==0.21.2)"] [[package]] name = "botocore-stubs" -version = "1.37.15" +version = "1.37.16" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.8" files = [ - {file = "botocore_stubs-1.37.15-py3-none-any.whl", hash = "sha256:70ef39669f3b9421c20295535aaeb81aa62d6a90969fb631caabe480fe11af0c"}, - {file = "botocore_stubs-1.37.15.tar.gz", hash = "sha256:055525b345cac085b4607335b13744756a3d43a4b7025b2e977d1c139b15c31b"}, + {file = "botocore_stubs-1.37.16-py3-none-any.whl", hash = "sha256:33973ee0e54ad5bf9f8560b2c36fc532b98540af6b9d4a57ffce5ae62a743a2a"}, + {file = "botocore_stubs-1.37.16.tar.gz", hash = "sha256:532376611ae0c49488b7bdac3674da9ac0de9a6c65198432790b11da41502caf"}, ] [package.dependencies] @@ -1783,13 +1783,13 @@ type = ["pytest-mypy"] [[package]] name = "iniconfig" -version = "2.0.0" +version = "2.1.0" description = "brain-dead simple config-ini parsing" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] [[package]] @@ -2474,19 +2474,19 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.3.6" +version = "4.3.7" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, - {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, + {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, + {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, ] [package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.11.2)"] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" @@ -3286,13 +3286,13 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "76.1.0" +version = "77.0.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-76.1.0-py3-none-any.whl", hash = "sha256:34750dcb17d046929f545dec9b8349fe42bf4ba13ddffee78428aec422dbfb73"}, - {file = "setuptools-76.1.0.tar.gz", hash = "sha256:4959b9ad482ada2ba2320c8f1a8d8481d4d8d668908a7a1b84d987375cd7f5bd"}, + {file = "setuptools-77.0.1-py3-none-any.whl", hash = "sha256:81a234dff81a82bb52e522c8aef145d0dd4de1fd6de4d3b196d0f77dc2fded26"}, + {file = "setuptools-77.0.1.tar.gz", hash = "sha256:a1246a1b4178c66d7cf50c9fc6d530fac3f89bc284cf803c7fa878c41b1a03b2"}, ] [package.extras] From f78c4858f379c152a13a1cca054e5b173fc5248b Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 15:32:32 -0800 Subject: [PATCH 092/166] Check for Nonetype Target Sequences to Silence MyPy Error --- src/mavedb/lib/validation/dataframe/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index eb81873d..4b784994 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -258,7 +258,7 @@ def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list if not targets: raise ValueError("No targets were provided; cannot validate observed sequence types with none observed.") - observed_sequence_types = [target.sequence_type for target in targets.values()] + observed_sequence_types = [target.sequence_type for target in targets.values() if target.sequence_type is not None] invalid_sequence_types = set(observed_sequence_types) - set(valid_sequence_types) if invalid_sequence_types: raise ValueError( From 119e7f2c721ff6487440c6d2d22bbef4888db35d Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 28 Feb 2025 21:53:14 -0800 Subject: [PATCH 093/166] Replace DataSet Columns Setter in Worker Variant Mocker --- tests/helpers/util/variant.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/helpers/util/variant.py b/tests/helpers/util/variant.py index 57bcd436..95720cac 100644 --- a/tests/helpers/util/variant.py +++ b/tests/helpers/util/variant.py @@ -7,7 +7,7 @@ from sqlalchemy import select from unittest.mock import patch -from mavedb.lib.score_sets import create_variants, create_variants_data, csv_data_to_df +from mavedb.lib.score_sets import create_variants, columns_for_dataset, create_variants_data, csv_data_to_df from mavedb.lib.validation.dataframe.dataframe import validate_and_standardize_dataframe_pair from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.enums.mapping_state import MappingState @@ -73,6 +73,10 @@ def mock_worker_variant_insertion( assert num_variants == 3 item.processing_state = ProcessingState.success + item.dataset_columns = { + "score_columns": columns_for_dataset(scores), + "count_columns": columns_for_dataset(counts), + } db.add(item) db.commit() From 58dfb7b7785266df3c3ba4e93a3a92ad6bd9c964 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sun, 2 Mar 2025 14:20:06 -0800 Subject: [PATCH 094/166] Add Base Editor Column to Target Accessions Table --- ...3b_add_is_base_editor_column_to_target_.py | 31 +++++++++++++++++ .../lib/validation/constants/general.py | 1 + src/mavedb/models/target_accession.py | 3 +- src/mavedb/view_models/target_accession.py | 1 + tests/helpers/constants.py | 34 +++++++++++++++++-- 5 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py diff --git a/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py new file mode 100644 index 00000000..041edda4 --- /dev/null +++ b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py @@ -0,0 +1,31 @@ +"""Add is_base_editor column to target_accessions + +Revision ID: f69b4049bc3b +Revises: c404b6719110 +Create Date: 2025-03-02 14:06:52.217554 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "f69b4049bc3b" +down_revision = "c404b6719110" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "target_accessions", sa.Column("is_base_editor", sa.Boolean(), nullable=False, server_default="false") + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("target_accessions", "is_base_editor") + # ### end Alembic commands ### diff --git a/src/mavedb/lib/validation/constants/general.py b/src/mavedb/lib/validation/constants/general.py index 94e5a844..92b4fd5b 100644 --- a/src/mavedb/lib/validation/constants/general.py +++ b/src/mavedb/lib/validation/constants/general.py @@ -35,6 +35,7 @@ hgvs_nt_column = "hgvs_nt" hgvs_splice_column = "hgvs_splice" hgvs_pro_column = "hgvs_pro" +guide_sequence_column = "guide_sequence" hgvs_columns = sorted([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column]) meta_data = "meta_data" score_columns = "score_columns" diff --git a/src/mavedb/models/target_accession.py b/src/mavedb/models/target_accession.py index e054a50f..9e176888 100644 --- a/src/mavedb/models/target_accession.py +++ b/src/mavedb/models/target_accession.py @@ -1,6 +1,6 @@ from datetime import date -from sqlalchemy import Column, Date, Integer, String +from sqlalchemy import Boolean, Column, Date, Integer, String from mavedb.db.base import Base @@ -14,3 +14,4 @@ class TargetAccession(Base): gene = Column(String, nullable=True) creation_date = Column(Date, nullable=False, default=date.today) modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today) + is_base_editor = Column(Boolean, nullable=False, default=False) diff --git a/src/mavedb/view_models/target_accession.py b/src/mavedb/view_models/target_accession.py index bf78ae25..05406719 100644 --- a/src/mavedb/view_models/target_accession.py +++ b/src/mavedb/view_models/target_accession.py @@ -7,6 +7,7 @@ class TargetAccessionBase(BaseModel): accession: str + is_base_editor: bool assembly: Optional[str] gene: Optional[str] diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index a497d1d6..828d0a48 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -668,7 +668,12 @@ "name": "TEST2", "category": "protein_coding", "externalIdentifiers": [], - "targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, + "targetAccession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, } ], } @@ -682,7 +687,31 @@ { "name": "TEST2", "category": "protein_coding", - "target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, + "target_accession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, + } + ], +} + +TEST_BASE_EDITOR_SCORESET = { + "title": "Test Score Set Acc Title", + "short_description": "Test accession score set", + "abstract_text": "Abstract", + "method_text": "Methods", + "target_genes": [ + { + "name": "TEST2", + "category": "protein_coding", + "target_accession": { + "accession": VALID_ACCESSION, + "assembly": "GRCh37", + "gene": VALID_GENE, + "isBaseEditor": False, + }, } ], } @@ -724,6 +753,7 @@ "accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, + "isBaseEditor": False, }, } ], From 4fbdbd7e422f4ef929813b68548324dff1d30c0f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sun, 2 Mar 2025 22:18:29 -0800 Subject: [PATCH 095/166] Validation logic and test cases for base editor data --- .../lib/validation/dataframe/dataframe.py | 51 ++++-- .../lib/validation/dataframe/variant.py | 7 + src/mavedb/view_models/score_set.py | 20 +++ tests/validation/dataframe/conftest.py | 2 + tests/validation/dataframe/test_dataframe.py | 165 ++++++++++++++---- tests/validation/dataframe/test_variant.py | 58 ++++++ tests/view_models/test_score_set.py | 41 +++-- 7 files changed, 275 insertions(+), 69 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index a8ab6557..61b96bb3 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -8,6 +8,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from mavedb.lib.validation.exceptions import ValidationError @@ -16,6 +17,7 @@ from mavedb.lib.validation.dataframe.variant import ( validate_hgvs_transgenic_column, validate_hgvs_genomic_column, + validate_guide_sequence_column, validate_hgvs_prefix_combinations, ) @@ -23,7 +25,7 @@ from cdot.hgvs.dataproviders import RESTDataProvider -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column) +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column, required_score_column) def validate_and_standardize_dataframe_pair( @@ -95,26 +97,31 @@ def validate_dataframe( ValidationError If one of the validators called raises an exception """ + # basic target meta data + score_set_is_accession_based = all(target.target_accession for target in targets) + score_set_is_sequence_based = all(target.target_sequence for target in targets) + score_set_is_base_editor = score_set_is_accession_based and all( + target.target_accession.is_base_editor for target in targets + ) + # basic checks - validate_column_names(df, kind) + validate_column_names(df, kind, score_set_is_base_editor) validate_no_null_rows(df) column_mapping = {c.lower(): c for c in df.columns} - index_column = choose_dataframe_index_column(df) + index_column = choose_dataframe_index_column(df, score_set_is_base_editor) prefixes: dict[str, Optional[str]] = dict() for c in column_mapping: + is_index = column_mapping[c] == index_column + if c in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): - is_index = column_mapping[c] == index_column prefixes[c] = None # Ignore validation for null non-index hgvs columns if df[column_mapping[c]].isna().all() and not is_index: continue - score_set_is_accession_based = all(target.target_accession for target in targets) - score_set_is_sequence_based = all(target.target_sequence for target in targets) - # This is typesafe, despite Pylance's claims otherwise if score_set_is_accession_based and not score_set_is_sequence_based: validate_hgvs_genomic_column( @@ -140,6 +147,9 @@ def validate_dataframe( else: prefixes[c] = df[column_mapping[c]].dropna()[0][0] + elif c is guide_sequence_column: + validate_guide_sequence_column(df[column_mapping[c]], is_index=is_index) + else: force_numeric = (c == required_score_column) or (kind == "counts") validate_data_column(df[column_mapping[c]], force_numeric) @@ -213,7 +223,7 @@ def column_sort_function(value, columns): return df[new_columns] -def validate_column_names(df: pd.DataFrame, kind: str) -> None: +def validate_column_names(df: pd.DataFrame, kind: str, is_base_editor: bool) -> None: """Validate the column names in a dataframe. This function validates the column names in the given dataframe. @@ -256,18 +266,27 @@ def validate_column_names(df: pd.DataFrame, kind: str) -> None: raise ValueError("kind only accepts scores and counts") if hgvs_splice_column in columns: - if hgvs_nt_column not in columns or hgvs_pro_column not in columns: - raise ValidationError( - f"dataframes with '{hgvs_splice_column}' must also define '{hgvs_nt_column}' and '{hgvs_pro_column}'" - ) + msg = "dataframes with '{0}' must also define a '{1}' column" + if hgvs_nt_column not in columns: + raise ValidationError(msg.format(hgvs_splice_column, hgvs_nt_column)) + elif hgvs_pro_column not in columns: + raise ValidationError(msg.format(hgvs_splice_column, hgvs_pro_column)) if len(columns) != len(set(columns)): raise ValidationError("duplicate column names are not allowed (this check is case insensitive)") + if is_base_editor: + msg = "dataframes for base editor data must also define the '{0}' column" + if guide_sequence_column not in columns: + raise ValidationError(msg.format(guide_sequence_column)) + + elif hgvs_nt_column not in columns: + raise ValidationError(msg.format(hgvs_nt_column)) + if set(columns).isdisjoint({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): raise ValidationError("dataframe does not define any variant columns") - if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column}): + if set(columns).issubset({hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column}): raise ValidationError("dataframe does not define any data columns") @@ -288,7 +307,7 @@ def validate_no_null_rows(df: pd.DataFrame) -> None: raise ValidationError(f"found {len(df[df.isnull().all(axis=1)])} null rows in the data frame") -def choose_dataframe_index_column(df: pd.DataFrame) -> str: +def choose_dataframe_index_column(df: pd.DataFrame, is_base_editor: bool) -> str: """ Identify the HGVS variant column that should be used as the index column in this dataframe. @@ -309,7 +328,9 @@ def choose_dataframe_index_column(df: pd.DataFrame) -> str: """ column_mapping = {c.lower(): c for c in df.columns if not df[c].isna().all()} - if hgvs_nt_column in column_mapping: + if is_base_editor: + return column_mapping[guide_sequence_column] + elif hgvs_nt_column in column_mapping: return column_mapping[hgvs_nt_column] elif hgvs_pro_column in column_mapping: return column_mapping[hgvs_pro_column] diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 4b784994..b4b5761f 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -3,6 +3,7 @@ from typing import Hashable, Optional, TYPE_CHECKING import pandas as pd +from fqfa.validator import dna_bases_validator from mavehgvs.exceptions import MaveHgvsParseError from mavehgvs.variant import Variant @@ -235,6 +236,12 @@ def parse_transgenic_variant( return True, None +def validate_guide_sequence_column(column: pd.Series, is_index: bool) -> None: + validate_variant_column(column, is_index) + if column.apply(lambda x: dna_bases_validator(x) is None if x is not None else False).any(): + raise ValidationError("Invalid guide sequence provided: all guide sequences must be valid DNA sequences.") + + def validate_observed_sequence_types(targets: dict[str, TargetSequence]) -> list[str]: """ Ensures that the sequence types of the given target sequences are an accepted type. diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index 30b93d28..0ed722ae 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -177,6 +177,26 @@ def at_least_one_target_gene_exists(cls, field_value, values): return field_value + # Validate nested label fields are not identical + @validator("target_genes") + def target_accession_base_editor_targets_are_consistent(cls, field_value, values): + # Only target accessions can have base editor data. + if len(field_value) > 1 and all([target.target_accession is not None for target in field_value]): + if len(set(target.target_accession.is_base_editor for target in field_value)) > 1: + # Throw the error for the first target, since it necessarily has an inconsistent base editor value. + raise ValidationError( + "All target accessions must be of the same base editor type.", + custom_loc=[ + "body", + "targetGene", + 0, + "targetAccession", + "isBaseEditor", + ], + ) + + return field_value + @validator("score_ranges") def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreRanges]): if field_value is None: diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index 8e4596db..a0cd4cb0 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -6,6 +6,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from tests.helpers.constants import TEST_CDOT_TRANSCRIPT @@ -32,6 +33,7 @@ def setUp(self): hgvs_nt_column: ["g.1A>G", "g.1A>T"], hgvs_splice_column: ["c.1A>G", "c.1A>T"], hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], + guide_sequence_column: ["AG", "AG"], required_score_column: [1.0, 2.0], "extra": [12.0, 3.0], "count1": [3.0, 5.0], diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 4bca6f2f..2eac2e83 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -9,6 +9,7 @@ hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, + guide_sequence_column, required_score_column, ) from mavedb.lib.validation.dataframe.dataframe import ( @@ -43,6 +44,7 @@ def test_sort_dataframe(self): "count2", "extra2", "mixed_types", + guide_sequence_column, "null_col", ] ] @@ -165,74 +167,114 @@ def test_only_hgvs_row(self): class TestColumnNames(DfTestCase): def test_only_two_kinds_of_dataframe(self): with self.assertRaises(ValueError): - validate_column_names(self.dataframe, kind="score2") + validate_column_names(self.dataframe, kind="score2", is_base_editor=False) def test_score_df_has_score_column(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([required_score_column], axis=1), kind="scores", is_base_editor=False + ) def test_count_df_lacks_score_column(self): - validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts") + validate_column_names(self.dataframe.drop([required_score_column], axis=1), kind="counts", is_base_editor=False) with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") + validate_column_names(self.dataframe, kind="counts", is_base_editor=False) def test_count_df_has_score_column(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe, kind="counts") + validate_column_names(self.dataframe, kind="counts", is_base_editor=False) def test_df_with_only_scores(self): - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores") + validate_column_names( + self.dataframe[[hgvs_pro_column, required_score_column]], kind="scores", is_base_editor=False + ) def test_count_df_must_have_data(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts") + validate_column_names( + self.dataframe[[hgvs_nt_column, hgvs_pro_column]], kind="counts", is_base_editor=False + ) def test_just_hgvs_nt(self): - validate_column_names(self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores") validate_column_names( - self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.drop([hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_just_hgvs_pro(self): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores") validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column], axis=1), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_just_hgvs_pro_and_nt(self): - validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores") - validate_column_names(self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), kind="counts") + validate_column_names(self.dataframe.drop([hgvs_splice_column], axis=1), kind="scores", is_base_editor=False) + validate_column_names( + self.dataframe.drop([hgvs_splice_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_both_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), kind="scores", is_base_editor=False + ) def test_hgvs_splice_must_have_pro_and_nt_nt_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_nt_column], axis=1), kind="scores", is_base_editor=False) def test_hgvs_splice_must_have_pro_and_nt_pro_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores") + validate_column_names(self.dataframe.drop([hgvs_pro_column], axis=1), kind="scores", is_base_editor=False) + + def test_base_editor_must_have_nt_nt_absent(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop([hgvs_nt_column], axis=1), + kind="scores", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_and_scores(self): with self.assertRaises(ValidationError): validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), kind="counts" + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, ) def test_hgvs_splice_must_have_pro_and_nt_nt_scores_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), kind="counts") + validate_column_names( + self.dataframe.drop([hgvs_nt_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_hgvs_splice_must_have_pro_and_nt_pro_scores_absent(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), kind="counts") + validate_column_names( + self.dataframe.drop([hgvs_pro_column, required_score_column], axis=1), + kind="counts", + is_base_editor=False, + ) def test_no_hgvs_column_scores(self): with pytest.raises(ValidationError) as exc_info: validate_column_names( - self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), kind="scores" + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column, hgvs_splice_column], axis=1), + kind="scores", + is_base_editor=False, ) assert "dataframe does not define any variant columns" in str(exc_info.value) @@ -243,52 +285,82 @@ def test_no_hgvs_column_counts(self): [hgvs_nt_column, hgvs_pro_column, hgvs_splice_column, required_score_column], axis=1 ), kind="counts", + is_base_editor=False, ) assert "dataframe does not define any variant columns" in str(exc_info.value) def test_validation_ignores_column_ordering_scores(self): validate_column_names( - self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], kind="scores" + self.dataframe[[hgvs_nt_column, required_score_column, hgvs_pro_column, hgvs_splice_column]], + kind="scores", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], + kind="scores", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], + kind="scores", + is_base_editor=False, ) - validate_column_names(self.dataframe[[required_score_column, hgvs_nt_column, hgvs_pro_column]], kind="scores") - validate_column_names(self.dataframe[[hgvs_pro_column, required_score_column, hgvs_nt_column]], kind="scores") def test_validation_ignores_column_ordering_counts(self): validate_column_names( - self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], kind="counts" + self.dataframe[[hgvs_nt_column, "count1", hgvs_pro_column, hgvs_splice_column, "count2"]], + kind="counts", + is_base_editor=False, + ) + validate_column_names( + self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts", is_base_editor=False + ) + validate_column_names( + self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts", is_base_editor=False ) - validate_column_names(self.dataframe[["count1", "count2", hgvs_nt_column, hgvs_pro_column]], kind="counts") - validate_column_names(self.dataframe[[hgvs_pro_column, "count1", "count2", hgvs_nt_column]], kind="counts") def test_validation_is_case_insensitive(self): - validate_column_names(self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores") validate_column_names( - self.dataframe.rename(columns={required_score_column: required_score_column.title()}), kind="scores" + self.dataframe.rename(columns={hgvs_nt_column: hgvs_nt_column.upper()}), kind="scores", is_base_editor=False + ) + validate_column_names( + self.dataframe.rename(columns={required_score_column: required_score_column.title()}), + kind="scores", + is_base_editor=False, ) def test_duplicate_hgvs_column_names_scores(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="scores", is_base_editor=False + ) def test_duplicate_hgvs_column_names_counts(self): with self.assertRaises(ValidationError): validate_column_names( self.dataframe.drop([required_score_column], axis=1).rename(columns={hgvs_pro_column: hgvs_nt_column}), kind="counts", + is_base_editor=False, ) def test_duplicate_score_column_names(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"extra": required_score_column}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={"extra": required_score_column}), kind="scores", is_base_editor=False + ) def test_duplicate_data_column_names_scores(self): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={"count2": "count1"}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={"count2": "count1"}), kind="scores", is_base_editor=False + ) def test_duplicate_data_column_names_counts(self): with self.assertRaises(ValidationError): validate_column_names( - self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), kind="counts" + self.dataframe.drop([required_score_column], axis=1).rename(columns={"count2": "count1"}), + kind="counts", + is_base_editor=False, ) # Written without @pytest.mark.parametrize. See: https://pytest.org/en/7.4.x/how-to/unittest.html#pytest-features-in-unittest-testcase-subclasses @@ -297,7 +369,9 @@ def test_invalid_column_names_scores(self): for value in invalid_values: with self.subTest(value=value): with self.assertRaises(ValidationError): - validate_column_names(self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores") + validate_column_names( + self.dataframe.rename(columns={hgvs_splice_column: value}), kind="scores", is_base_editor=False + ) def test_invalid_column_names_counts(self): invalid_values = [None, np.nan, "", " "] @@ -309,36 +383,55 @@ def test_invalid_column_names_counts(self): columns={hgvs_splice_column: value} ), kind="counts", + is_base_editor=False, ) def test_ignore_column_ordering_scores(self): validate_column_names( self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, "score", hgvs_nt_column, "count2"]], kind="scores", + is_base_editor=False, ) def test_ignore_column_ordering_counts(self): validate_column_names( self.dataframe[[hgvs_splice_column, "extra", "count1", hgvs_pro_column, hgvs_nt_column, "count2"]], kind="counts", + is_base_editor=False, ) + def test_is_base_editor_and_contains_guide_sequence_column(self): + validate_column_names(self.dataframe, kind="scores", is_base_editor=True) + + def test_is_base_editor_and_does_not_contain_guide_sequence_column(self): + with self.assertRaises(ValidationError): + validate_column_names( + self.dataframe.drop(guide_sequence_column, axis=1), kind="scores", is_base_editor=True + ) + class TestChooseDataframeIndexColumn(DfTestCase): def setUp(self): super().setUp() + def test_guide_sequence_index_column(self): + index = choose_dataframe_index_column(self.dataframe, is_base_editor=True) + assert index == guide_sequence_column + def test_nt_index_column(self): - index = choose_dataframe_index_column(self.dataframe) + index = choose_dataframe_index_column(self.dataframe, is_base_editor=False) assert index == hgvs_nt_column def test_pro_index_column(self): - index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1)) + index = choose_dataframe_index_column(self.dataframe.drop(hgvs_nt_column, axis=1), is_base_editor=False) assert index == hgvs_pro_column def test_no_valid_index_column(self): with self.assertRaises(ValidationError): - choose_dataframe_index_column(self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1)) + choose_dataframe_index_column( + self.dataframe.drop([hgvs_nt_column, hgvs_pro_column], axis=1), + is_base_editor=False, + ) class TestValidateHgvsPrefixCombinations(TestCase): diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index 810780f4..c8a0f258 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -9,6 +9,7 @@ hgvs_splice_column, ) from mavedb.lib.validation.dataframe.variant import ( + validate_guide_sequence_column, validate_hgvs_transgenic_column, validate_hgvs_genomic_column, parse_genomic_variant, @@ -810,6 +811,63 @@ def test_parse_mismatched_transgenic_variant(self): assert "target sequence mismatch" in error +class TestValidateGuideSequenceColumn(DfTestCase): + def setUp(self): + super().setUp() + + self.valid_guide_sequences = [ + pd.Series(["ATG", "TGA"], name="guide_sequence"), + pd.Series(["ATGC", "TGAC"], name="guide_sequence"), + pd.Series(["ATGCG", "TGACG"], name="guide_sequence"), + ] + + self.invalid_guide_sequences = [ + pd.Series(["ATG", "XYZ"], name="guide_sequence"), # invalid DNA sequence + pd.Series(["123", "123"], name="guide_sequence"), # contains numeric + ] + + self.invalid_index_guide_sequences = [ + pd.Series(["ATG", None], name="guide_sequence"), # contains None value + pd.Series(["ATG", "ATG"], name="guide_sequence"), # identical sequences + ] + + self.accession_test_case = AccessionTestCase() + + def test_valid_guide_sequences(self): + for column in self.valid_guide_sequences + self.invalid_index_guide_sequences: + with self.subTest(column=column): + validate_guide_sequence_column( + column, + is_index=False, + ) + + def test_invalid_guide_sequences(self): + for column in self.invalid_guide_sequences: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_guide_sequence_column( + column, + is_index=False, + ) + + def test_valid_guide_sequences_index(self): + for column in self.valid_guide_sequences: + with self.subTest(column=column): + validate_guide_sequence_column( + column, + is_index=True, + ) + + def test_invalid_guide_sequences_index(self): + for column in self.invalid_guide_sequences + self.invalid_index_guide_sequences: + with self.subTest(column=column): + with self.assertRaises(ValidationError): + validate_guide_sequence_column( + column, + is_index=True, + ) + + class TestValidateObservedSequenceTypes(unittest.TestCase): def setUp(self): super().setUp() diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index 1247020c..c155f9b7 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -3,7 +3,7 @@ from mavedb.view_models.publication_identifier import PublicationIdentifierCreate from mavedb.view_models.score_set import ScoreSetCreate, ScoreSetModify from mavedb.view_models.target_gene import TargetGeneCreate -from tests.helpers.constants import TEST_MINIMAL_SEQ_SCORESET +from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET def test_cannot_create_score_set_without_a_target(): @@ -459,25 +459,10 @@ def test_cannot_create_score_set_with_normal_range_and_no_wild_type_score(): ], } - with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**jsonable_encoder(score_set_test)) - - assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) - - -def test_cannot_create_score_set_without_default_ranges(): - score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - score_set_test["score_ranges"] = { - "wt_score": -0.5, - "ranges": [ - {"label": "range_1", "classification": "other", "range": (-1, 0)}, - ], - } - with pytest.raises(ValueError) as exc_info: ScoreSetModify(**score_set_test) - assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) + assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) @pytest.mark.parametrize("classification", ["normal", "abnormal", "not_specified"]) @@ -491,4 +476,24 @@ def test_can_create_score_set_with_any_range_classification(classification): ], } - ScoreSetModify(**jsonable_encoder(score_set_test)) + ScoreSetModify(**score_set_test) + assert "Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal']" in str(exc_info.value) + + +def test_cannot_create_score_set_with_inconsistent_base_editor_flags(): + score_set_test = TEST_MINIMAL_ACC_SCORESET.copy() + + target_gene_one = TargetGeneCreate(**score_set_test["targetGenes"][0]) + target_gene_two = TargetGeneCreate(**score_set_test["targetGenes"][0]) + + target_gene_one.target_accession.is_base_editor = True + target_gene_two.target_accession.is_base_editor = False + + score_set_test.pop("targetGenes") + with pytest.raises(ValueError) as exc_info: + ScoreSetModify( + **score_set_test, + target_genes=[target_gene_one, target_gene_two], + ) + + assert "All target accessions must be of the same base editor type." in str(exc_info.value) From a9dc19afb254dddf81417348ab6595cfa2545d07 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:12:49 -0800 Subject: [PATCH 096/166] Add isBaseEditor Flag to Remaining Accession Tests --- tests/helpers/constants.py | 2 +- tests/view_models/test_target_gene.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 828d0a48..b11add4f 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -691,7 +691,7 @@ "accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, - "isBaseEditor": False, + "is_base_editor": False, }, } ], diff --git a/tests/view_models/test_target_gene.py b/tests/view_models/test_target_gene.py index 13f8b78a..e72eafd7 100644 --- a/tests/view_models/test_target_gene.py +++ b/tests/view_models/test_target_gene.py @@ -44,7 +44,7 @@ def test_create_target_gene_with_accession(): name = "BRCA1" category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] - target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} + target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1", "isBaseEditor": False} externalIdentifier = TargetGeneCreate( name=name, category=category, @@ -206,7 +206,7 @@ def test_cant_create_target_gene_with_both_sequence_and_accession(): name = "UBE2I" category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] - target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} + target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1", "isBaseEditor": False} target_sequence = { "sequenceType": "dna", "sequence": "ATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGA" From 38509961f172cda5bbe297cbe817ee2b2b32a87e Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:45:55 -0800 Subject: [PATCH 097/166] Add GUIDE_SEQUENCE_COLUMN constant to mave lib --- src/mavedb/lib/mave/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mavedb/lib/mave/constants.py b/src/mavedb/lib/mave/constants.py index f313436a..a94da0c1 100644 --- a/src/mavedb/lib/mave/constants.py +++ b/src/mavedb/lib/mave/constants.py @@ -6,6 +6,7 @@ HGVS_NT_COLUMN = "hgvs_nt" HGVS_SPLICE_COLUMN = "hgvs_splice" HGVS_PRO_COLUMN = "hgvs_pro" +GUIDE_SEQUENCE_COLUMN = "guide_sequence" HGVS_COLUMNS = sorted([HGVS_NT_COLUMN, HGVS_PRO_COLUMN, HGVS_SPLICE_COLUMN]) # META_DATA = 'meta_data' From 0b89482bdb38194b2f6316038547e99e831da1c2 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 09:46:13 -0800 Subject: [PATCH 098/166] Use existing boolean flag for transgenic marker in prefix validation --- src/mavedb/lib/validation/dataframe/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index 61b96bb3..a43f6b55 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -158,7 +158,7 @@ def validate_dataframe( hgvs_nt=prefixes[hgvs_nt_column], hgvs_splice=prefixes[hgvs_splice_column], hgvs_pro=prefixes[hgvs_pro_column], - transgenic=all(target.target_sequence for target in targets), + transgenic=score_set_is_sequence_based, ) From 965c4f56a0a76f450519abf37684d01edc871f58 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 11:27:00 -0800 Subject: [PATCH 099/166] Clarify error message for accession based variants with accessions missing from targets list --- src/mavedb/lib/validation/dataframe/column.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/column.py b/src/mavedb/lib/validation/dataframe/column.py index 8505a8cc..ef6ee23c 100644 --- a/src/mavedb/lib/validation/dataframe/column.py +++ b/src/mavedb/lib/validation/dataframe/column.py @@ -82,7 +82,9 @@ def validate_variant_formatting(column: pd.Series, prefixes: list[str], targets: if not all(str(v).split(":")[1][:2] in prefixes for v in variants): raise ValidationError(f"variant column '{column.name}' has invalid variant prefixes") if not all(str(v).split(":")[0] in targets for v in variants): - raise ValidationError(f"variant column '{column.name}' has invalid accession identifiers") + raise ValidationError( + f"variant column '{column.name}' has invalid accession identifiers; some accession identifiers present in the score file were not added as targets" + ) else: if len(set(v[:2] for v in variants)) > 1: From 6d0d8f5c80b4656c1888ed976ef07614837737eb Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 11:29:11 -0800 Subject: [PATCH 100/166] Move guide sequence column to the end of the standard columns sorted list --- src/mavedb/lib/validation/dataframe/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index a43f6b55..edb253be 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -25,7 +25,7 @@ from cdot.hgvs.dataproviders import RESTDataProvider -STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column, required_score_column) +STANDARD_COLUMNS = (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, required_score_column, guide_sequence_column) def validate_and_standardize_dataframe_pair( From 45afca84f8a57e214ea4d2042dbda3679affab4c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:15:44 -0800 Subject: [PATCH 101/166] Add additional column validation tests --- tests/validation/dataframe/test_dataframe.py | 40 +++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 2eac2e83..0673523d 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -494,14 +494,50 @@ def test_same_df(self): def test_ignore_order(self): validate_variant_columns_match(self.dataframe, self.dataframe.iloc[::-1]) - def test_missing_column(self): + def test_missing_column_nt(self): with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_nt_column, axis=1)) with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe.drop(hgvs_nt_column, axis=1), self.dataframe) - def test_missing_variant(self): + def test_missing_column_pro(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_pro_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_pro_column, axis=1), self.dataframe) + + def test_missing_column_splice(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(hgvs_splice_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(hgvs_splice_column, axis=1), self.dataframe) + + def test_missing_column_guide(self): + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, self.dataframe.drop(guide_sequence_column, axis=1)) + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe.drop(guide_sequence_column, axis=1), self.dataframe) + + def test_missing_variant_nt(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_nt_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) + + def test_missing_variant_pro(self): df2 = self.dataframe.copy() df2.loc[0, hgvs_pro_column] = None with self.assertRaises(ValidationError): validate_variant_columns_match(self.dataframe, df2) + + def test_missing_variant_splice(self): + df2 = self.dataframe.copy() + df2.loc[0, hgvs_splice_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) + + def test_missing_guide(self): + df2 = self.dataframe.copy() + df2.loc[0, guide_sequence_column] = None + with self.assertRaises(ValidationError): + validate_variant_columns_match(self.dataframe, df2) From d965f683b4a048964fed27feba2f5299ade2eac0 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:16:03 -0800 Subject: [PATCH 102/166] Fix sort order of dataframe test case columns --- tests/validation/dataframe/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index a0cd4cb0..38612df4 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -33,8 +33,8 @@ def setUp(self): hgvs_nt_column: ["g.1A>G", "g.1A>T"], hgvs_splice_column: ["c.1A>G", "c.1A>T"], hgvs_pro_column: ["p.Met1Val", "p.Met1Leu"], - guide_sequence_column: ["AG", "AG"], required_score_column: [1.0, 2.0], + guide_sequence_column: ["AG", "AG"], "extra": [12.0, 3.0], "count1": [3.0, 5.0], "count2": [9, 10], From 73a0799fd0874ae7efac21af42fcd6f8a9ca9f65 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:16:25 -0800 Subject: [PATCH 103/166] Use equality comparison over is operator for column name comparison --- src/mavedb/lib/validation/dataframe/dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/dataframe.py b/src/mavedb/lib/validation/dataframe/dataframe.py index edb253be..be95b5b4 100644 --- a/src/mavedb/lib/validation/dataframe/dataframe.py +++ b/src/mavedb/lib/validation/dataframe/dataframe.py @@ -147,7 +147,7 @@ def validate_dataframe( else: prefixes[c] = df[column_mapping[c]].dropna()[0][0] - elif c is guide_sequence_column: + elif c == guide_sequence_column: validate_guide_sequence_column(df[column_mapping[c]], is_index=is_index) else: @@ -377,7 +377,7 @@ def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): If both dataframes do not define the same variants within each column """ for c in df1.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column): if c not in df2: raise ValidationError("both score and count dataframes must define matching HGVS columns") elif df1[c].isnull().all() and df2[c].isnull().all(): @@ -387,6 +387,6 @@ def validate_variant_columns_match(df1: pd.DataFrame, df2: pd.DataFrame): f"both score and count dataframes must define matching variants, discrepancy found in '{c}'" ) for c in df2.columns: - if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column): + if c.lower() in (hgvs_nt_column, hgvs_splice_column, hgvs_pro_column, guide_sequence_column): if c not in df1: raise ValidationError("both score and count dataframes must define matching HGVS columns") From 93086430ce33e5126e32e9c691a5b1cad51ca371 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 3 Mar 2025 12:17:23 -0800 Subject: [PATCH 104/166] Allow the Unix Domain Socket during test runs This allows the use of the vs-code pytest extension but still prevents the use of external connections. Enabling this socket makes it easier to test within the code editor. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 79467b15..e9681321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,7 +97,7 @@ plugins = [ mypy_path = "mypy_stubs" [tool.pytest.ini_options] -addopts = "-v -rP --import-mode=importlib --disable-socket --allow-hosts localhost,::1,127.0.0.1" +addopts = "-v -rP --import-mode=importlib --disable-socket --allow-unix-socket --allow-hosts localhost,::1,127.0.0.1" asyncio_mode = 'strict' testpaths = "tests/" pythonpath = "." From 58f2af297b164e45e92988fd1e6f8ba2c2473009 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:18:59 -0800 Subject: [PATCH 105/166] Add Multi-Variant Support for Accession Based Targets The hgvs package is not able to parse allelic variation (multi-variants denoted by brackets), which are often a key variant string in base editor data. We work around this by: - Parsing the multi-variant into MaveHGVS without any target info to ascertain whether it is syntactically valid - Parsing each subvariant against the provided transcript to ascertain whether it is informationally valid --- .../lib/validation/dataframe/variant.py | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index b4b5761f..07c400d0 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -76,7 +76,7 @@ def validate_hgvs_transgenic_column(column: pd.Series, is_index: bool, targets: target_seqs = construct_target_sequence_mappings(column, targets) parsed_variants = [ - parse_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() + validate_transgenic_variant(idx, variant, target_seqs, len(targets) > 1) for idx, variant in column.items() ] # format and raise an error message that contains all invalid variants @@ -168,10 +168,10 @@ def validate_hgvs_genomic_column( hp, vr = None, None if hp is not None and vr is not None: - parsed_variants = [parse_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] + parsed_variants = [validate_genomic_variant(idx, variant, hp, vr) for idx, variant in column.items()] else: parsed_variants = [ - parse_transgenic_variant( + validate_transgenic_variant( idx, variant, {target: None for target in target_accession_identifiers}, @@ -190,9 +190,46 @@ def validate_hgvs_genomic_column( return -def parse_genomic_variant( +def validate_genomic_variant( idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" ) -> tuple[bool, Optional[str]]: + def _validate_allelic_variation(variant: str) -> bool: + """ + The HGVS package is currently unable to parse allelic variation, and this doesn't seem like a planned + feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS, + does support this sort of multivariant we can: + - Validate that the multi-variant allele is valid HGVS. + - Validate each sub-variant in an allele is valid with respect to the transcript. + + Parameters + ---------- + variant : str + The multi-variant allele to validate. + + Returns + ------- + bool + True if the allele is valid. + + Raises + ------ + MaveHgvsParseError + If the variant is not a valid HGVS string (for reasons of syntax). + hgvs.exceptions.HGVSError + If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). + """ + transcript, multi_variant = variant.split(":") + + # Validate that the multi-variant allele is valid HGVS. + Variant(multi_variant) + prefix, variants = multi_variant[0:2], multi_variant[2:] + + # Validate each sub-variant in an allele is valid with respect to the transcript. + for subvariant in variants.strip("[]").split(";"): + validator.validate(parser.parse(f"{transcript}:{prefix}{subvariant}"), strict=False) + + return True + # Not pretty, but if we make it here we're guaranteed to have hgvs installed as a package, and we # should make use of the built in exception they provide for variant validation. import hgvs.exceptions @@ -202,14 +239,19 @@ def parse_genomic_variant( for variant in variant_string.split(" "): try: - validator.validate(parser.parse(variant), strict=False) + if "[" in variant: + _validate_allelic_variation(variant) + else: + validator.validate(parser.parse(variant), strict=False) + except MaveHgvsParseError: + return False, f"Failed to parse variant string '{variant}' at row {idx}." except hgvs.exceptions.HGVSError as e: - return False, f"Failed to parse row {idx} with HGVS exception: {e}" + return False, f"Failed to parse row {idx} with HGVS exception: {e}." return True, None -def parse_transgenic_variant( +def validate_transgenic_variant( idx: Hashable, variant_string: str, target_sequences: dict[str, Optional[str]], is_fully_qualified: bool ) -> tuple[bool, Optional[str]]: if not variant_string: From 84334cba0c175a094022246b03be190dd7ee8c05 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:21:46 -0800 Subject: [PATCH 106/166] Multi-Variant Genomic Validation Tests Adds tests for multi-variant validation for accession based variants. As part of this change, an additional transcript was added to tests genomic based protein variants in addition to just testing nucleotide based variants. --- tests/conftest_optional.py | 12 +- tests/helpers/constants.py | 36 +- tests/helpers/data/refseq.NP_001637.4.fasta | 2 + .../helpers/data/refseq.NP_001637.4.fasta.fai | 1 + tests/helpers/util/score_set.py | 6 +- tests/routers/test_hgvs.py | 26 +- tests/routers/test_statistics.py | 8 +- tests/validation/dataframe/conftest.py | 17 +- tests/validation/dataframe/test_dataframe.py | 2 +- tests/validation/dataframe/test_variant.py | 311 +++++++++++------- tests/worker/test_jobs.py | 24 +- 11 files changed, 286 insertions(+), 159 deletions(-) create mode 100644 tests/helpers/data/refseq.NP_001637.4.fasta create mode 100644 tests/helpers/data/refseq.NP_001637.4.fasta.fai diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index e0f35fc4..722e8dc6 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -138,19 +138,23 @@ def data_provider(): To provide the transcript for the FASTA file without a network request, use: ``` - from helpers.utils.constants import TEST_CDOT_TRANSCRIPT + from helpers.utils.constants import TEST_NT_CDOT_TRANSCRIPT, TEST_PRO_CDOT_TRANSCRIPT from unittest.mock import patch import cdot.hgvs.dataproviders - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT): + ... + with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT): ... ``` """ this_file_dir = os.path.dirname(abspath(getsourcefile(lambda: 0))) - test_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + test_nt_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NM_001637.3.fasta") + test_pro_fasta_file = os.path.join(this_file_dir, "helpers/data/refseq.NP_001637.4.fasta") data_provider = cdot.hgvs.dataproviders.RESTDataProvider( seqfetcher=cdot.hgvs.dataproviders.ChainedSeqFetcher( - cdot.hgvs.dataproviders.FastaSeqFetcher(test_fasta_file), + cdot.hgvs.dataproviders.FastaSeqFetcher(test_nt_fasta_file), + cdot.hgvs.dataproviders.FastaSeqFetcher(test_pro_fasta_file), # Include normal seqfetcher to fall back on mocked requests (or expose test shortcomings via socket connection attempts). cdot.hgvs.dataproviders.SeqFetcher(), ) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index b11add4f..d0dfd4a1 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -28,6 +28,8 @@ TEST_HGVS_IDENTIFIER = f"{TEST_REFSEQ_IDENTIFIER}:p.Asp5Phe" VALID_ACCESSION = "NM_001637.3" +VALID_NT_ACCESSION = "NM_001637.3" +VALID_PRO_ACCESSION = "NP_001637.4" VALID_GENE = "BRCA1" VALID_CLINGEN_PA_ID = "PA2579908752" @@ -669,7 +671,7 @@ "category": "protein_coding", "externalIdentifiers": [], "targetAccession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -688,7 +690,7 @@ "name": "TEST2", "category": "protein_coding", "target_accession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "is_base_editor": False, @@ -707,7 +709,7 @@ "name": "TEST2", "category": "protein_coding", "target_accession": { - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -750,7 +752,7 @@ "externalIdentifiers": [], "targetAccession": { "recordType": "TargetAccession", - "accession": VALID_ACCESSION, + "accession": VALID_NT_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE, "isBaseEditor": False, @@ -771,10 +773,32 @@ "officialCollections": [], } -TEST_CDOT_TRANSCRIPT = { +TEST_NT_CDOT_TRANSCRIPT = { "start_codon": 0, "stop_codon": 18, - "id": VALID_ACCESSION, + "id": VALID_NT_ACCESSION, + "gene_version": "313", + "gene_name": VALID_GENE, + "biotype": ["protein_coding"], + "protein": "NP_001628.1", + "genome_builds": { + "GRCh37": { + "cds_end": 1, + "cds_start": 18, + "contig": "NC_000007.13", + # The exons are non-sense but it doesn't really matter for the tests. + "exons": [[1, 12, 20, 2001, 2440, "M196 I1 M61 I1 M181"], [12, 18, 19, 1924, 2000, None]], + "start": 1, + "stop": 18, + "strand": "+", + } + }, +} + +TEST_PRO_CDOT_TRANSCRIPT = { + "start_codon": 0, + "stop_codon": 18, + "id": VALID_PRO_ACCESSION, "gene_version": "313", "gene_name": VALID_GENE, "biotype": ["protein_coding"], diff --git a/tests/helpers/data/refseq.NP_001637.4.fasta b/tests/helpers/data/refseq.NP_001637.4.fasta new file mode 100644 index 00000000..6904295b --- /dev/null +++ b/tests/helpers/data/refseq.NP_001637.4.fasta @@ -0,0 +1,2 @@ +>NP_001637.4 range=chr7:36512941-36724494 5'pad=0 3'pad=0 strand=- repeatMasking=none +DYGYYDYGYYDYGYYDYGYYDYGYYDYGYYDYGYY diff --git a/tests/helpers/data/refseq.NP_001637.4.fasta.fai b/tests/helpers/data/refseq.NP_001637.4.fasta.fai new file mode 100644 index 00000000..eb93b5fa --- /dev/null +++ b/tests/helpers/data/refseq.NP_001637.4.fasta.fai @@ -0,0 +1 @@ +NP_001637.4 35 86 35 36 diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index 1da70620..69ff7ca5 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -14,9 +14,9 @@ from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate from tests.helpers.constants import ( - TEST_CDOT_TRANSCRIPT, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, + TEST_NT_CDOT_TRANSCRIPT, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, @@ -55,7 +55,9 @@ def create_acc_score_set( jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): response = client.post("/api/v1/score-sets/", json=score_set_payload) assert response.status_code == 200, "Could not create accession based score set" diff --git a/tests/routers/test_hgvs.py b/tests/routers/test_hgvs.py index 9a19f709..b931d859 100644 --- a/tests/routers/test_hgvs.py +++ b/tests/routers/test_hgvs.py @@ -10,24 +10,24 @@ fastapi = pytest.importorskip("fastapi") hgvs = pytest.importorskip("hgvs") -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT, VALID_ACCESSION, VALID_GENE +from tests.helpers.constants import TEST_NT_CDOT_TRANSCRIPT, VALID_NT_ACCESSION, VALID_GENE VALID_MAJOR_ASSEMBLY = "GRCh38" VALID_MINOR_ASSEMBLY = "GRCh38.p3" INVALID_ASSEMBLY = "undefined" -INVALID_ACCESSION = "NC_999999.99" +INVALID_NT_ACCESSION = "NC_999999.99" SMALL_ACCESSION = "NM_002977.4" INVALID_GENE = "fnord" VALID_TRANSCRIPT = "NM_001408458.1" INVALID_TRANSCRIPT = "NX_99999.1" -VALID_VARIANT = VALID_ACCESSION + ":c.1G>A" -INVALID_VARIANT = VALID_ACCESSION + ":c.1delA" +VALID_VARIANT = VALID_NT_ACCESSION + ":c.1G>A" +INVALID_VARIANT = VALID_NT_ACCESSION + ":c.1delA" HAS_PROTEIN_ACCESSION = "NM_000014.4" PROTEIN_ACCESSION = "NP_000005.2" def test_hgvs_fetch_valid(client, setup_router_db): - response = client.get(f"/api/v1/hgvs/fetch/{VALID_ACCESSION}") + response = client.get(f"/api/v1/hgvs/fetch/{VALID_NT_ACCESSION}") assert response.status_code == 200 assert response.text == '"GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACA"' @@ -43,14 +43,18 @@ def test_hgvs_fetch_invalid(client, setup_router_db): def test_hgvs_validate_valid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): payload = {"variant": VALID_VARIANT} response = client.post("/api/v1/hgvs/validate", json=payload) assert response.status_code == 200 def test_hgvs_validate_invalid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): payload = {"variant": INVALID_VARIANT} response = client.post("/api/v1/hgvs/validate", json=payload) @@ -144,7 +148,9 @@ def test_hgvs_gene_transcript_invalid(client, setup_router_db): def test_hgvs_transcript_valid(client, setup_router_db): - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): response = client.get(f"/api/v1/hgvs/{VALID_TRANSCRIPT}") assert response.status_code == 200 @@ -189,9 +195,9 @@ def test_hgvs_transcript_protein_no_protein(client, setup_router_db): def test_hgvs_transcript_protein_invalid(client, setup_router_db): with requests_mock.mock() as m: - m.get(f"https://cdot.cc/transcript/{INVALID_ACCESSION}", status_code=404) + m.get(f"https://cdot.cc/transcript/{INVALID_NT_ACCESSION}", status_code=404) - response = client.get(f"/api/v1/hgvs/protein/{INVALID_ACCESSION}") + response = client.get(f"/api/v1/hgvs/protein/{INVALID_NT_ACCESSION}") assert m.called assert response.status_code == 404 diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index a26f349e..f6ef6f6c 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -12,7 +12,7 @@ from tests.helpers.constants import ( TEST_BIORXIV_IDENTIFIER, - TEST_CDOT_TRANSCRIPT, + TEST_NT_CDOT_TRANSCRIPT, TEST_KEYWORDS, TEST_MEDRXIV_IDENTIFIER, TEST_MINIMAL_ACC_SCORESET, @@ -44,7 +44,9 @@ @pytest.fixture def setup_acc_scoreset(setup_router_db, session, data_provider, client, data_files): experiment = create_experiment(client) - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): score_set = create_acc_score_set(client, experiment["urn"]) score_set = mock_worker_variant_insertion( client, session, data_provider, score_set, data_files / "scores_acc.csv" @@ -242,7 +244,7 @@ def test_target_gene_identifier_statistiscs( experiment = create_experiment(client) if "targetAccession" in target: with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): unpublished_score_set = create_acc_score_set(client, experiment["urn"]) unpublished_score_set = mock_worker_variant_insertion( diff --git a/tests/validation/dataframe/conftest.py b/tests/validation/dataframe/conftest.py index 38612df4..0cbba30d 100644 --- a/tests/validation/dataframe/conftest.py +++ b/tests/validation/dataframe/conftest.py @@ -9,19 +9,22 @@ guide_sequence_column, required_score_column, ) -from tests.helpers.constants import TEST_CDOT_TRANSCRIPT +from tests.helpers.constants import TEST_NT_CDOT_TRANSCRIPT, TEST_PRO_CDOT_TRANSCRIPT @pytest.fixture def mocked_data_provider_class_attr(request): """ - Sets the `human_data_provider` attribute on the class from the requesting - test context to the `data_provider` fixture. This allows fixture use across - the `unittest.TestCase` class. + Sets the `mocked_nt_human_data_provider` and `mocked_pro_human_data_provider` + attributes on the class from the requesting test context to the `data_provider` + fixture. This allows fixture use across the `unittest.TestCase` class. """ - data_provider = mock.Mock() - data_provider._get_transcript.return_value = TEST_CDOT_TRANSCRIPT - request.cls.mocked_human_data_provider = data_provider + nt_data_provider = mock.Mock() + nt_data_provider._get_transcript.return_value = TEST_NT_CDOT_TRANSCRIPT + pro_data_provider = mock.Mock() + pro_data_provider._get_transcript.return_value = TEST_PRO_CDOT_TRANSCRIPT + request.cls.mocked_nt_human_data_provider = nt_data_provider + request.cls.mocked_pro_human_data_provider = pro_data_provider # Special DF Test Case that contains dummy data for tests below diff --git a/tests/validation/dataframe/test_dataframe.py b/tests/validation/dataframe/test_dataframe.py index 0673523d..884d271d 100644 --- a/tests/validation/dataframe/test_dataframe.py +++ b/tests/validation/dataframe/test_dataframe.py @@ -144,7 +144,7 @@ class TestValidateStandardizeDataFramePair(DfTestCase): def test_no_targets(self): with self.assertRaises(ValueError): validate_and_standardize_dataframe_pair( - self.dataframe, counts_df=None, targets=[], hdp=self.mocked_human_data_provider + self.dataframe, counts_df=None, targets=[], hdp=self.mocked_nt_human_data_provider ) # TODO: Add additional DataFrames. Realistically, if other unit tests pass this function is ok diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index c8a0f258..931c044b 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -12,14 +12,19 @@ validate_guide_sequence_column, validate_hgvs_transgenic_column, validate_hgvs_genomic_column, - parse_genomic_variant, - parse_transgenic_variant, + validate_genomic_variant, + validate_transgenic_variant, validate_observed_sequence_types, validate_hgvs_prefix_combinations, ) from mavedb.lib.validation.exceptions import ValidationError -from tests.helpers.constants import VALID_ACCESSION, TEST_CDOT_TRANSCRIPT +from tests.helpers.constants import ( + VALID_NT_ACCESSION, + VALID_PRO_ACCESSION, + TEST_NT_CDOT_TRANSCRIPT, + TEST_PRO_CDOT_TRANSCRIPT, +) from tests.validation.dataframe.conftest import DfTestCase @@ -348,45 +353,60 @@ def test_valid_column_values_wrong_column_name_multi_target(self): # Spoof the accession type class AccessionTestCase: - def __init__(self): - self.accession = VALID_ACCESSION + def __init__(self, accession): + self.accession = accession class GenomicColumnValidationTestCase(DfTestCase): def setUp(self): super().setUp() - self.accession_test_case = AccessionTestCase() + self.accession_test_case = [AccessionTestCase(VALID_NT_ACCESSION), AccessionTestCase(VALID_PRO_ACCESSION)] + + self.valid_hgvs_nt_column = pd.Series( + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[1G>A;2A>T]"], + name=hgvs_nt_column, + ) + + self.valid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.Asp1Tyr", + f"{VALID_PRO_ACCESSION}:p.Tyr2Asp", + f"{VALID_PRO_ACCESSION}:p.[Asp1Tyr;Tyr2Asp]", + ], + name=hgvs_pro_column, + ) - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + self.missing_data = pd.Series([f"{VALID_NT_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) + self.duplicate_data = pd.Series( + [f"{VALID_NT_ACCESSION}:c.4A>G", f"{VALID_NT_ACCESSION}:c.4A>G"], name=hgvs_nt_column ) - self.missing_data = pd.Series([f"{VALID_ACCESSION}:c.3T>G", None], name=hgvs_nt_column) - self.duplicate_data = pd.Series([f"{VALID_ACCESSION}:c.4A>G", f"{VALID_ACCESSION}:c.4A>G"], name=hgvs_nt_column) self.invalid_hgvs_columns_by_name = [ - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_splice_column), - pd.Series([f"{VALID_ACCESSION}:g.1A>G", f"{VALID_ACCESSION}:g.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:n.1A>T"], name=hgvs_pro_column), - pd.Series([f"{VALID_ACCESSION}:p.Met1Val", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), + pd.Series([f"{VALID_NT_ACCESSION}:g.1A>G", f"{VALID_NT_ACCESSION}:g.1A>T"], name=hgvs_splice_column), + pd.Series([f"{VALID_NT_ACCESSION}:g.1A>G", f"{VALID_NT_ACCESSION}:g.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:n.1A>G", f"{VALID_NT_ACCESSION}:n.1A>T"], name=hgvs_pro_column), + pd.Series([f"{VALID_NT_ACCESSION}:p.Met1Val", f"{VALID_NT_ACCESSION}:p.Met1Leu"], name=hgvs_nt_column), ] self.invalid_hgvs_columns_by_contents = [ pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_splice_column + [f"{VALID_NT_ACCESSION}:r.1a>g", f"{VALID_NT_ACCESSION}:r.1a>u"], name=hgvs_splice_column ), # rna not allowed pd.Series( - [f"{VALID_ACCESSION}:r.1a>g", f"{VALID_ACCESSION}:r.1a>u"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:r.1a>g", f"{VALID_NT_ACCESSION}:r.1a>u"], name=hgvs_nt_column ), # rna not allowed - pd.Series([f"{VALID_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant - pd.Series([f"{VALID_ACCESSION}:n.1A>G", f"{VALID_ACCESSION}:c.1A>T"], name=hgvs_nt_column), # mixed prefix + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", "_wt"], name=hgvs_nt_column), # old special variant + pd.Series([f"{VALID_NT_ACCESSION}:p.Met1Leu", "_sy"], name=hgvs_pro_column), # old special variant + pd.Series( + [f"{VALID_NT_ACCESSION}:n.1A>G", f"{VALID_NT_ACCESSION}:c.1A>T"], name=hgvs_nt_column + ), # mixed prefix pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:p.Met1Leu"], name=hgvs_pro_column ), # mixed types/prefix pd.Series(["c.1A>G", "p.Met1Leu"], name=hgvs_pro_column), # variants should be fully qualified - pd.Series([f"{VALID_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric + pd.Series([f"{VALID_NT_ACCESSION}:c.1A>G", 2.5], name=hgvs_nt_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_nt_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_splice_column), # contains numeric pd.Series([1.0, 2.5], name=hgvs_pro_column), # contains numeric @@ -394,7 +414,7 @@ def setUp(self): self.invalid_hgvs_columns_by_contents_under_strict_validation = [ pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.5A>T"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.5A>T"], name=hgvs_nt_column ), # out of bounds for target ] @@ -408,8 +428,8 @@ def test_valid_variant_invalid_missing_index(self): validate_hgvs_genomic_column( self.missing_data, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore # Identical behavior for installed/uninstalled HGVS @@ -420,8 +440,8 @@ def test_valid_variant_invalid_duplicate_index(self): validate_hgvs_genomic_column( self.duplicate_data, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore @@ -441,45 +461,65 @@ def patched_data_provider_class_attr(request, data_provider): class TestValidateHgvsGenomicColumnHgvsInstalled(GenomicColumnValidationTestCase): def test_valid_variant(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_nt_column, + is_index=False, + targets=self.accession_test_case, + hdp=self.patched_human_data_provider, + ) # type: ignore + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_pro_column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_valid_missing(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_valid_duplicate(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore def test_valid_variant_index(self): with patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, + hdp=self.patched_human_data_provider, + ) # type: ignore + + with patch.object( + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_PRO_CDOT_TRANSCRIPT + ): + validate_hgvs_genomic_column( + self.valid_hgvs_pro_column, + is_index=True, + targets=self.accession_test_case, hdp=self.patched_human_data_provider, ) # type: ignore @@ -491,13 +531,13 @@ def test_invalid_column_values(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) for column in ( @@ -507,13 +547,13 @@ def test_invalid_column_values(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) @@ -523,13 +563,13 @@ def test_valid_column_values_wrong_column_name(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: @@ -537,13 +577,13 @@ def test_valid_column_values_wrong_column_name(self): self.subTest(column=column), self.assertRaises(ValidationError), patch.object( - cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT + cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_NT_CDOT_TRANSCRIPT ), ): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=self.patched_human_data_provider, # type: ignore ) @@ -555,54 +595,70 @@ class TestValidateHgvsGenomicColumnHgvsNotInstalled(GenomicColumnValidationTestC def test_valid_variant_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, + ) # type: ignore + + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_nt_column, + is_index=True, + targets=self.accession_test_case, + hdp=self.mocked_pro_human_data_provider, ) # type: ignore def test_valid_variant_limited_validation(self): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=False, targets=[self.accession_test_case], hdp=None - ) # type: ignore + for column in [self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]: + with self.subTest(column=column): + validate_hgvs_genomic_column(column, is_index=False, targets=self.accession_test_case, hdp=None) def test_valid_variant_valid_missing_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore def test_valid_variant_valid_missing_limited_validation(self): - validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=self.accession_test_case, hdp=None) # type: ignore def test_valid_variant_valid_duplicate_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( self.missing_data, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, ) # type: ignore def test_valid_variant_valid_duplicate_limited_validation(self): - validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=[self.accession_test_case], hdp=None) # type: ignore + validate_hgvs_genomic_column(self.missing_data, is_index=False, targets=self.accession_test_case, hdp=None) # type: ignore def test_valid_variant_index_strict_validation(self): with self.assertRaises(ModuleNotFoundError): validate_hgvs_genomic_column( - self.valid_hgvs_column, + self.valid_hgvs_nt_column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, + ) # type: ignore + + with self.assertRaises(ModuleNotFoundError): + validate_hgvs_genomic_column( + self.valid_hgvs_pro_column, + is_index=True, + targets=self.accession_test_case, + hdp=self.mocked_pro_human_data_provider, ) # type: ignore def test_valid_variant_index_limited_validation(self): - validate_hgvs_genomic_column( - self.valid_hgvs_column, is_index=True, targets=[self.accession_test_case], hdp=None - ) # type: ignore + for column in [self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]: + with self.subTest(column=column): + validate_hgvs_genomic_column(column, is_index=True, targets=self.accession_test_case, hdp=None) def test_invalid_column_values_strict_validation(self): for column in ( @@ -612,8 +668,8 @@ def test_invalid_column_values_strict_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) for column in ( self.invalid_hgvs_columns_by_contents + self.invalid_hgvs_columns_by_contents_under_strict_validation @@ -622,8 +678,8 @@ def test_invalid_column_values_strict_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) def test_invalid_column_values_limited_validation(self): @@ -632,7 +688,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_contents: @@ -640,7 +696,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_contents_under_strict_validation: @@ -648,7 +704,7 @@ def test_invalid_column_values_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) @@ -658,16 +714,16 @@ def test_valid_column_values_wrong_column_name_strict_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: with self.subTest(column=column), self.assertRaises(ValidationError): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], - hdp=self.mocked_human_data_provider, # type: ignore + targets=self.accession_test_case, + hdp=self.mocked_nt_human_data_provider, # type: ignore ) def test_valid_column_values_wrong_column_name_limited_validation(self): @@ -676,7 +732,7 @@ def test_valid_column_values_wrong_column_name_limited_validation(self): validate_hgvs_genomic_column( column, is_index=False, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) for column in self.invalid_hgvs_columns_by_name: @@ -684,12 +740,12 @@ def test_valid_column_values_wrong_column_name_limited_validation(self): validate_hgvs_genomic_column( column, is_index=True, - targets=[self.accession_test_case], + targets=self.accession_test_case, hdp=None, # type: ignore ) -class TestParseGenomicVariant(unittest.TestCase): +class TestValidateGenomicVariant(unittest.TestCase): def setUp(self): super().setUp() @@ -699,94 +755,121 @@ def setUp(self): self.validator.validate.return_value = True self.falsy_variant_strings = [None, ""] - self.valid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1G>A", f"{VALID_ACCESSION}:c.2A>T"], name=hgvs_nt_column + self.valid_hgvs_nt_column = pd.Series( + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[2A>T;1G>A]"], + name=hgvs_nt_column, + ) + self.invalid_hgvs_nt_column = pd.Series( + [ + f"{VALID_NT_ACCESSION}:c.1laksdfG>A", + f"{VALID_NT_ACCESSION}:c.2kadlfjA>T", + f"{VALID_NT_ACCESSION}:[c.2A>T;c.1G>A]", + ], + name=hgvs_nt_column, + ) + self.valid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.Asp1Tyr", + f"{VALID_PRO_ACCESSION}:p.Tyr2Asp", + f"{VALID_PRO_ACCESSION}:p.[Asp1Tyr;Tyr2Asp]", + ], + name=hgvs_pro_column, ) - self.invalid_hgvs_column = pd.Series( - [f"{VALID_ACCESSION}:c.1laksdfG>A", f"{VALID_ACCESSION}:c.2kadlfjA>T"], name=hgvs_nt_column + self.invalid_hgvs_pro_column = pd.Series( + [ + f"{VALID_PRO_ACCESSION}:p.1laksdfG>A", + f"{VALID_PRO_ACCESSION}:p.2kadlfjA>T", + f"{VALID_PRO_ACCESSION}:[p.Asp1Tyr;p.Tyr2Asp]", + ], + name=hgvs_pro_column, ) @unittest.skipUnless(HGVS_INSTALLED, "HGVS module not installed") -class TestParseGenomicVariantHgvsInstalled(TestParseGenomicVariant): - def test_parse_genomic_variant_nonetype_variant_string(self): - for variant_string in self.falsy_variant_strings: +class TestValidateGenomicVariantHgvsInstalled(TestValidateGenomicVariant): + def test_validate_genomic_variant_nonetype_variant_string(self): + for idx, variant_string in enumerate(self.falsy_variant_strings): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, None, self.parser, self.validator) + valid, error = validate_genomic_variant(idx, None, self.parser, self.validator) assert valid assert error is None - def test_parse_valid_hgvs_variant(self): - for variant_string in self.valid_hgvs_column: + def test_validate_valid_hgvs_variant(self): + for idx, variant_string in enumerate([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) assert valid assert error is None - def test_parse_invalid_hgvs_variant(self): + def test_validate_invalid_hgvs_variant(self): from hgvs.exceptions import HGVSError self.validator.validate.side_effect = HGVSError("Invalid variant") - for variant_string in self.invalid_hgvs_column: + for idx, variant_string in enumerate((self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column)): with self.subTest(variant_string=variant_string): - valid, error = parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) assert not valid - assert "Failed to parse row 0 with HGVS exception:" in error + assert f"Failed to parse row {idx} with HGVS exception:" in error @unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") -class TestParseGenomicVariantHgvsNotInstalled(TestParseGenomicVariant): - def test_parse_genomic_variant_nonetype_variant_string(self): - for variant_string in self.falsy_variant_strings: +class TestValidateGenomicVariantHgvsNotInstalled(TestValidateGenomicVariant): + def test_validate_genomic_variant_nonetype_variant_string(self): + for idx, variant_string in enumerate(self.falsy_variant_strings): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, None, self.parser, self.validator) + validate_genomic_variant(idx, None, self.parser, self.validator) - def test_parse_valid_hgvs_variant(self): - for variant_string in self.valid_hgvs_column: + def test_validate_valid_hgvs_variant(self): + for idx, variant_string in enumerate( + [column for column in [self.valid_hgvs_nt_column + self.valid_hgvs_pro_column]] + ): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + validate_genomic_variant(idx, variant_string, self.parser, self.validator) - def test_parse_invalid_hgvs_variant(self): - for variant_string in self.invalid_hgvs_column: + def test_validate_invalid_hgvs_variant(self): + for idx, variant_string in enumerate( + [column for column in [self.invalid_hgvs_nt_column + self.invalid_hgvs_pro_column]] + ): with self.subTest(variant_string=variant_string), self.assertRaises(ModuleNotFoundError): - parse_genomic_variant(0, self.valid_hgvs_column[0], self.parser, self.validator) + validate_genomic_variant(idx, variant_string, self.parser, self.validator) -class TestParseTransgenicVariant(unittest.TestCase): +class TestValidateTransgenicVariant(unittest.TestCase): def setUp(self): super().setUp() - self.target_sequences = {f"{VALID_ACCESSION}": "ATGC"} + self.target_sequences = {f"{VALID_NT_ACCESSION}": "ATGC"} self.falsy_variant_strings = [None, ""] self.valid_fully_qualified_transgenic_column = pd.Series( - [f"{VALID_ACCESSION}:c.1A>G", f"{VALID_ACCESSION}:c.2T>G {VALID_ACCESSION}:c.2T>G"], name=hgvs_nt_column + [f"{VALID_NT_ACCESSION}:c.1A>G", f"{VALID_NT_ACCESSION}:c.2T>G {VALID_NT_ACCESSION}:c.2T>G"], + name=hgvs_nt_column, ) self.valid_basic_transgenic_column = pd.Series(["c.1A>G", "c.2T>G c.2T>G"], name=hgvs_nt_column) self.invalid_transgenic_column = pd.Series(["123A>X", "NM_001:123A>Y"], name=hgvs_nt_column) self.mismatched_transgenic_column = pd.Series(["c.1T>G", "c.2A>G"], name=hgvs_nt_column) - def test_parse_transgenic_variant_nonetype_variant_string(self): + def test_validate_transgenic_variant_nonetype_variant_string(self): for variant_string in self.falsy_variant_strings: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) + valid, error = validate_transgenic_variant(0, None, self.target_sequences, is_fully_qualified=False) assert valid assert error is None - def test_parse_valid_fully_qualified_transgenic_variant(self): + def test_validate_valid_fully_qualified_transgenic_variant(self): for variant_string in self.valid_fully_qualified_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=True ) assert valid assert error is None - def test_parse_valid_basic_transgenic_variant(self): + def test_validate_valid_basic_transgenic_variant(self): for variant_string in self.valid_basic_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert valid @@ -795,7 +878,7 @@ def test_parse_valid_basic_transgenic_variant(self): def test_parse_invalid_transgenic_variant(self): for variant_string in self.invalid_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert not valid @@ -804,7 +887,7 @@ def test_parse_invalid_transgenic_variant(self): def test_parse_mismatched_transgenic_variant(self): for variant_string in self.mismatched_transgenic_column: with self.subTest(variant_string=variant_string): - valid, error = parse_transgenic_variant( + valid, error = validate_transgenic_variant( 0, variant_string, self.target_sequences, is_fully_qualified=False ) assert not valid @@ -831,7 +914,7 @@ def setUp(self): pd.Series(["ATG", "ATG"], name="guide_sequence"), # identical sequences ] - self.accession_test_case = AccessionTestCase() + self.accession_test_case = [AccessionTestCase(VALID_PRO_ACCESSION), AccessionTestCase(VALID_NT_ACCESSION)] def test_valid_guide_sequences(self): for column in self.valid_guide_sequences + self.invalid_index_guide_sequences: diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index f7baba9e..f148be78 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -40,19 +40,19 @@ from tests.helpers.constants import ( - TEST_CDOT_TRANSCRIPT, TEST_CLINGEN_SUBMISSION_RESPONSE, TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, TEST_CLINGEN_LDH_LINKING_RESPONSE, + TEST_NT_CDOT_TRANSCRIPT, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, TEST_VARIANT_MAPPING_SCAFFOLD, - VALID_ACCESSION, + VALID_NT_ACCESSION, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X, - TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, + TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, ) from tests.helpers.util.exceptions import awaitable_exception @@ -105,7 +105,7 @@ async def setup_records_files_and_variants(session, async_client, data_files, in with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ): result = await create_variants_for_score_set(worker_ctx, uuid4().hex, score_set.id, 1, scores, counts) @@ -187,7 +187,7 @@ async def setup_mapping_output(async_client, session, score_set, empty=False): { "exception": "encountered 1 invalid variant strings.", "detail": [ - "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)" + "Failed to parse row 0 with HGVS exception: NM_001637.3:c.1T>A: Variant reference (T) does not agree with reference sequence (G)." ], }, ), @@ -208,13 +208,13 @@ async def test_create_variants_for_score_set_with_validation_error( if input_score_set == TEST_MINIMAL_SEQ_SCORESET: scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" else: - scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_ACCESSION}:c.1T>A" + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" with ( patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp, ): result = await create_variants_for_score_set( @@ -321,7 +321,7 @@ async def test_create_variants_for_score_set_with_existing_variants( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -344,7 +344,7 @@ async def test_create_variants_for_score_set_with_existing_variants( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -402,7 +402,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -441,7 +441,7 @@ async def test_create_variants_for_score_set( with patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp: result = await create_variants_for_score_set( standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts @@ -491,7 +491,7 @@ async def dummy_linking_job(): patch.object( cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", - return_value=TEST_CDOT_TRANSCRIPT, + return_value=TEST_NT_CDOT_TRANSCRIPT, ) as hdp, patch.object( _UnixSelectorEventLoop, From 41d2a1fad72f8441588863ae2de34bdabe7a5f7e Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 13:54:49 -0800 Subject: [PATCH 107/166] Logical names for git action checks --- .github/workflows/run-tests-on-push.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 1dccd615..803541af 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -8,7 +8,7 @@ env: jobs: run-tests-3_9-core-dependencies: runs-on: ubuntu-20.04 - name: Pytest on Python 3.9 / Ubuntu 20.04 + name: Pytest on Core Dependencies-- Python 3.9 / Ubuntu 20.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -21,8 +21,8 @@ jobs: - run: poetry run pytest tests/ run-tests-3_9: - runs-on: ubuntu-latest - name: Pytest on Python 3.9 + runs-on: ubuntu-20.04 + name: Pytest on Optional Dependencies-- Python 3.9 / Ubuntu 20.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -36,7 +36,7 @@ jobs: run-tests-3_10-core-dependencies: runs-on: ubuntu-latest - name: Pytest on Python 3.10 + name: Pytest on Core Dependencies-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -50,7 +50,7 @@ jobs: run-tests-3_10: runs-on: ubuntu-latest - name: Pytest on Python 3.10 + name: Pytest on Optional Dependencies-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -64,7 +64,7 @@ jobs: run-tests-3_11-core-dependencies: runs-on: ubuntu-latest - name: Pytest on Python 3.11 + name: Pytest on Core Dependencies-- Python 3.11 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -78,7 +78,7 @@ jobs: run-tests-3_11: runs-on: ubuntu-latest - name: Pytest on Python 3.11 + name: Pytest on Optional Dependencies-- Python 3.11 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -92,7 +92,7 @@ jobs: run-mypy-3_10: runs-on: ubuntu-latest - name: MyPy checks on Python 3.10 + name: MyPy on Full Codebase-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -106,7 +106,7 @@ jobs: run-ruff-lint: runs-on: ubuntu-latest - name: Ruff linting on Python 3.10 + name: Ruff on Full Codebase-- Python 3.10 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From d20bc6d7a184a2015dc21cfc5e2fe37ad88e85ee Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 15:07:11 -0800 Subject: [PATCH 108/166] Bump SeqRepo Version, Add Volume to Dev Containers --- docker-compose-dev.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 294708d0..c44b7b8f 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -23,6 +23,7 @@ services: - "8002:8000" volumes: - .:/code + - mavedb-seqrepo-dev:/usr/local/share/seqrepo worker: image: mavedb-api/mavedb-worker:dev @@ -41,6 +42,7 @@ services: LOG_CONFIG: dev volumes: - .:/code + - mavedb-seqrepo-dev:/usr/local/share/seqrepo depends_on: - db - redis @@ -77,15 +79,10 @@ services: - mavedb-seqrepo-dev:/usr/local/share/seqrepo seqrepo: - image: biocommons/seqrepo:2021-01-29 + image: biocommons/seqrepo:2024-12-20 volumes: - mavedb-seqrepo-dev:/usr/local/share/seqrepo -# rabbitmq: -# image: rabbitmq:3.8.3 -# ports: -# - "5673:5672" - volumes: mavedb-data-dev: mavedb-redis-dev: From 645bcbfd8e1cb05d5855bc1091e34759c28e6468 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 15:08:33 -0800 Subject: [PATCH 109/166] Add SeqRepo based seqfetcher to data provider Prior to this, we weren't really using SeqRepo to do transcript resolution (unintentionally). Note that to use SeqRepo in this manner, a new environment variable `HGVS_SEQREPO_DIR` should be set. --- settings/.env.template | 3 ++- src/mavedb/data_providers/services.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/settings/.env.template b/settings/.env.template index 32d693af..5d4af067 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -50,7 +50,8 @@ UTA_DB_URL=postgresql://anonymous:anonymous@uta.biocommons.org:5432/uta/uta_2018 # Environment variables for seqrepo #################################################################################################### -SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2021-01-29 +SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2024-12-20 +HGVS_SEQREPO_DIR=/usr/local/share/seqrepo/2024-12-20 #################################################################################################### # Environment variables for mapping MaveDB connection diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index 3d16a8e5..3b241bef 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -3,7 +3,7 @@ from typing import Optional, TypedDict import requests -from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", @@ -14,7 +14,7 @@ def seqfetcher() -> ChainedSeqFetcher: - return ChainedSeqFetcher(*[FastaSeqFetcher(file) for file in GENOMIC_FASTA_FILES]) + return ChainedSeqFetcher(SeqFetcher(), *[FastaSeqFetcher(file) for file in GENOMIC_FASTA_FILES]) def cdot_rest() -> RESTDataProvider: From 9b4a9aef0613224eea73198b8ae037005ecf4b17 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 7 Mar 2025 16:49:20 -0800 Subject: [PATCH 110/166] Add SeqFetcher MyPy type stub --- mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi b/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi index 3df0a0ec..5c84d2b4 100644 --- a/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi +++ b/mypy_stubs/cdot/hgvs/dataproviders/fasta_seqfetcher.pyi @@ -1,6 +1,7 @@ from typing import Union -from hgvs.dataproviders.seqfetcher import SeqFetcher +class SeqFetcher: + def __init__(self, *args) -> None: ... class FastaSeqFetcher: def __init__(self, *args, cache: bool = True) -> None: ... From 941cb4909c4754f889639da5c0aa341aebeeca3b Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 12 Mar 2025 20:09:44 -0700 Subject: [PATCH 111/166] Refactor fixes --- tests/helpers/constants.py | 2 +- tests/routers/test_statistics.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index d0dfd4a1..199ff1b4 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -827,7 +827,7 @@ "genomic": { "sequence_id": "ga4gh:SQ.em9khDCUYXrVWBfWr9r8fjBUrTjj1aig", "sequence_type": "dna", - "sequence_accessions": [VALID_ACCESSION], + "sequence_accessions": [VALID_NT_ACCESSION], "sequence_genes": [VALID_GENE], } } diff --git a/tests/routers/test_statistics.py b/tests/routers/test_statistics.py index f6ef6f6c..b2632f54 100644 --- a/tests/routers/test_statistics.py +++ b/tests/routers/test_statistics.py @@ -73,7 +73,6 @@ def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_fil # Note that we have not created indexes for this view when it is generated via metadata. This differs # from the database created via alembic, which does create indexes. PublishedVariantsMV.refresh(session, False) - session.commit() def assert_statistic(desired_field_value, response): From 5e9f6f12f0c98ab1f8a7c12c0a89db3b67e113d9 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 13 Mar 2025 10:13:50 -0700 Subject: [PATCH 112/166] Use MaveHGVS to determine if variant is a multi-variant --- mypy_stubs/mavehgvs/variant.pyi | 5 ++- .../lib/validation/dataframe/variant.py | 33 ++++++++++--------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/mypy_stubs/mavehgvs/variant.pyi b/mypy_stubs/mavehgvs/variant.pyi index 35086b3d..f1d79665 100644 --- a/mypy_stubs/mavehgvs/variant.pyi +++ b/mypy_stubs/mavehgvs/variant.pyi @@ -1,4 +1,5 @@ -from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Union +from re import Match from .position import VariantPosition @@ -21,3 +22,5 @@ class Variant: prefix: str sequence: Union[str, Tuple[str, str], List[Optional[Union[str, Tuple[str, str]]]], None] + is_multi_variant: Callable[..., bool] + fullmatch: Callable[..., Optional[Match[str]]] diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 07c400d0..7a72710a 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -16,8 +16,6 @@ construct_target_sequence_mappings, ) from mavedb.lib.validation.constants.target import strict_valid_sequence_types as valid_sequence_types - - from mavedb.models.target_sequence import TargetSequence from mavedb.models.target_accession import TargetAccession @@ -193,10 +191,10 @@ def validate_hgvs_genomic_column( def validate_genomic_variant( idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" ) -> tuple[bool, Optional[str]]: - def _validate_allelic_variation(variant: str) -> bool: + def _validate_allelic_variation(transcript: str, variant: str) -> bool: """ The HGVS package is currently unable to parse allelic variation, and this doesn't seem like a planned - feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS, + feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS does support this sort of multivariant we can: - Validate that the multi-variant allele is valid HGVS. - Validate each sub-variant in an allele is valid with respect to the transcript. @@ -218,15 +216,16 @@ def _validate_allelic_variation(variant: str) -> bool: hgvs.exceptions.HGVSError If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). """ - transcript, multi_variant = variant.split(":") - - # Validate that the multi-variant allele is valid HGVS. - Variant(multi_variant) - prefix, variants = multi_variant[0:2], multi_variant[2:] - # Validate each sub-variant in an allele is valid with respect to the transcript. - for subvariant in variants.strip("[]").split(";"): - validator.validate(parser.parse(f"{transcript}:{prefix}{subvariant}"), strict=False) + # mavehgvs doesn't offer a convenient way to access the variant sub-string in a multi-variant, + # but this is the same logic it uses to parse them into component substrings. + variant_match = Variant.fullmatch(variant) + if not variant_match: + return False + + variant_string = variant_match.groupdict()["multi_variant"] + for variant_sub_string in variant_string[3:-1].split(";"): + validator.validate(parser.parse(f"{transcript}:{variant_string[0]}.{variant_sub_string}"), strict=False) return True @@ -239,11 +238,15 @@ def _validate_allelic_variation(variant: str) -> bool: for variant in variant_string.split(" "): try: - if "[" in variant: - _validate_allelic_variation(variant) + # Some issue with mavehgvs RegEx causes multivariants with a transcript identifier + # to fail RegEx validation, so we need to split the transcript and variant string up front. + transcript, variant_string = variant.split(":") + if Variant(variant_string).is_multi_variant(): + _validate_allelic_variation(transcript, variant_string) else: validator.validate(parser.parse(variant), strict=False) - except MaveHgvsParseError: + except MaveHgvsParseError as e: + logger.error("err", exc_info=e) return False, f"Failed to parse variant string '{variant}' at row {idx}." except hgvs.exceptions.HGVSError as e: return False, f"Failed to parse row {idx} with HGVS exception: {e}." From 5d261dfe6ed88d80b9bd9662f6ca18188a619083 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 13 Mar 2025 10:14:27 -0700 Subject: [PATCH 113/166] Fix tests for MaveHGVS parsing --- tests/validation/dataframe/test_variant.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/validation/dataframe/test_variant.py b/tests/validation/dataframe/test_variant.py index 931c044b..93b658cb 100644 --- a/tests/validation/dataframe/test_variant.py +++ b/tests/validation/dataframe/test_variant.py @@ -756,7 +756,7 @@ def setUp(self): self.falsy_variant_strings = [None, ""] self.valid_hgvs_nt_column = pd.Series( - [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.2A>T", f"{VALID_NT_ACCESSION}:c.[2A>T;1G>A]"], + [f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.1G>A", f"{VALID_NT_ACCESSION}:c.[1G>A;2A>T]"], name=hgvs_nt_column, ) self.invalid_hgvs_nt_column = pd.Series( @@ -795,22 +795,18 @@ def test_validate_genomic_variant_nonetype_variant_string(self): assert error is None def test_validate_valid_hgvs_variant(self): - for idx, variant_string in enumerate([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column]): + for idx, variant_string in enumerate(pd.concat([self.valid_hgvs_nt_column, self.valid_hgvs_pro_column])): with self.subTest(variant_string=variant_string): - valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string, self.parser, self.validator) assert valid assert error is None def test_validate_invalid_hgvs_variant(self): - from hgvs.exceptions import HGVSError - - self.validator.validate.side_effect = HGVSError("Invalid variant") - - for idx, variant_string in enumerate((self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column)): + for idx, variant_string in enumerate(pd.concat([self.invalid_hgvs_nt_column, self.invalid_hgvs_pro_column])): with self.subTest(variant_string=variant_string): - valid, error = validate_genomic_variant(idx, variant_string[0], self.parser, self.validator) + valid, error = validate_genomic_variant(idx, variant_string, self.parser, self.validator) assert not valid - assert f"Failed to parse row {idx} with HGVS exception:" in error + assert f"Failed to parse variant string '{variant_string}' at row {idx}" in error @unittest.skipIf(HGVS_INSTALLED, "HGVS module installed") From 57bb3bcb3d0e2618596f9f814351f7bffe6553cd Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Sat, 29 Mar 2025 11:01:52 -0700 Subject: [PATCH 114/166] Rebase fixes (could fixup) --- tests/view_models/test_score_set.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index c155f9b7..5f45bce0 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -465,6 +465,21 @@ def test_cannot_create_score_set_with_normal_range_and_no_wild_type_score(): assert "A normal range has been provided, but no wild type score has been provided." in str(exc_info.value) +def test_cannot_create_score_set_without_default_ranges(): + score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test["score_ranges"] = { + "wt_score": -0.5, + "ranges": [ + {"label": "range_1", "classification": "other", "range": (-1, 0)}, + ], + } + + with pytest.raises(ValueError) as exc_info: + ScoreSetModify(**score_set_test) + + assert "unexpected value; permitted: 'normal', 'abnormal', 'not_specified'" in str(exc_info.value) + + @pytest.mark.parametrize("classification", ["normal", "abnormal", "not_specified"]) def test_can_create_score_set_with_any_range_classification(classification): wt_score = -0.5 if classification == "normal" else None @@ -477,7 +492,6 @@ def test_can_create_score_set_with_any_range_classification(classification): } ScoreSetModify(**score_set_test) - assert "Unexpected classification value(s): other. Permitted values: ['normal', 'abnormal']" in str(exc_info.value) def test_cannot_create_score_set_with_inconsistent_base_editor_flags(): From a40afb51323952c0229189803e2edbe5f8282de8 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 4 Apr 2025 09:13:46 -0400 Subject: [PATCH 115/166] wip: Use new MaveHGVS methods for allelic validation This change will need to pull in the new MaveHGVS version. --- .../lib/validation/dataframe/variant.py | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 7a72710a..20aa04fe 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -191,7 +191,7 @@ def validate_hgvs_genomic_column( def validate_genomic_variant( idx: Hashable, variant_string: str, parser: "Parser", validator: "Validator" ) -> tuple[bool, Optional[str]]: - def _validate_allelic_variation(transcript: str, variant: str) -> bool: + def _validate_allelic_variation(variant: Variant) -> bool: """ The HGVS package is currently unable to parse allelic variation, and this doesn't seem like a planned feature (see: https://github.com/biocommons/hgvs/issues/538). As a workaround and because MaveHGVS @@ -201,7 +201,7 @@ def _validate_allelic_variation(transcript: str, variant: str) -> bool: Parameters ---------- - variant : str + variant : MaveHGVS Style Variant The multi-variant allele to validate. Returns @@ -216,16 +216,9 @@ def _validate_allelic_variation(transcript: str, variant: str) -> bool: hgvs.exceptions.HGVSError If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). """ - # Validate each sub-variant in an allele is valid with respect to the transcript. - # mavehgvs doesn't offer a convenient way to access the variant sub-string in a multi-variant, - # but this is the same logic it uses to parse them into component substrings. - variant_match = Variant.fullmatch(variant) - if not variant_match: - return False - variant_string = variant_match.groupdict()["multi_variant"] - for variant_sub_string in variant_string[3:-1].split(";"): - validator.validate(parser.parse(f"{transcript}:{variant_string[0]}.{variant_sub_string}"), strict=False) + for variant_sub_string in variant.components(): + validator.validate(parser.parse(variant_sub_string), strict=False) return True @@ -238,16 +231,14 @@ def _validate_allelic_variation(transcript: str, variant: str) -> bool: for variant in variant_string.split(" "): try: - # Some issue with mavehgvs RegEx causes multivariants with a transcript identifier - # to fail RegEx validation, so we need to split the transcript and variant string up front. - transcript, variant_string = variant.split(":") - if Variant(variant_string).is_multi_variant(): - _validate_allelic_variation(transcript, variant_string) + variant_obj = Variant(variant) + if variant_obj.is_multi_variant(): + _validate_allelic_variation(variant_obj) else: - validator.validate(parser.parse(variant), strict=False) + validator.validate(parser.parse(str(variant_obj)), strict=False) except MaveHgvsParseError as e: logger.error("err", exc_info=e) - return False, f"Failed to parse variant string '{variant}' at row {idx}." + return False, f"Failed to parse variant string '{str(variant_obj)}' at row {idx}." except hgvs.exceptions.HGVSError as e: return False, f"Failed to parse row {idx} with HGVS exception: {e}." From 6226c9389427f66fd6564f912e9dd91acfbe9c76 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 6 May 2025 09:52:17 -0700 Subject: [PATCH 116/166] Fixed rebasing error. --- tests/helpers/util.py | 372 ------------------------------------------ 1 file changed, 372 deletions(-) delete mode 100644 tests/helpers/util.py diff --git a/tests/helpers/util.py b/tests/helpers/util.py deleted file mode 100644 index 6519e4d0..00000000 --- a/tests/helpers/util.py +++ /dev/null @@ -1,372 +0,0 @@ -from copy import deepcopy -from datetime import date -from unittest.mock import patch - -import cdot.hgvs.dataproviders -import jsonschema -from arq import ArqRedis -from sqlalchemy import select -from sqlalchemy.exc import NoResultFound - -from mavedb.lib.score_sets import columns_for_dataset, create_variants, create_variants_data, csv_data_to_df -from mavedb.lib.validation.dataframe import validate_and_standardize_dataframe_pair -from mavedb.models.clinical_control import ClinicalControl as ClinicalControlDbModel -from mavedb.models.contributor import Contributor -from mavedb.models.enums.processing_state import ProcessingState -from mavedb.models.enums.mapping_state import MappingState -from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.score_set import ScoreSet as ScoreSetDbModel -from mavedb.models.license import License -from mavedb.models.target_gene import TargetGene -from mavedb.models.user import User -from mavedb.models.variant import Variant -from mavedb.view_models.collection import Collection -from mavedb.models.mapped_variant import MappedVariant as MappedVariantDbModel -from mavedb.models.variant import Variant as VariantDbModel -from mavedb.view_models.experiment import Experiment, ExperimentCreate -from mavedb.view_models.score_set import ScoreSet, ScoreSetCreate -from tests.helpers.constants import ( - TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, - TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, - EXTRA_USER, - TEST_CDOT_TRANSCRIPT, - TEST_COLLECTION, - TEST_MINIMAL_ACC_SCORESET, - TEST_MINIMAL_EXPERIMENT, - TEST_MINIMAL_PRE_MAPPED_METADATA, - TEST_MINIMAL_POST_MAPPED_METADATA, - TEST_MINIMAL_SEQ_SCORESET, - TEST_MINIMAL_MAPPED_VARIANT, - TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, - TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, -) - - -def add_contributor(db, urn, model, orcid_id: str, given_name: str, family_name: str): - """Without making an API call, add a new contributor to the record (experiment or score set) with given urn and model.""" - item = db.query(model).filter(model.urn == urn).one_or_none() - assert item is not None - - try: - contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).one() - except NoResultFound: - contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) - db.add(contributor) - - item.contributors = [contributor] - db.add(item) - db.commit() - - -def change_ownership(db, urn, model): - """Change the ownership of the record with given urn and model to the extra user.""" - item = db.query(model).filter(model.urn == urn).one_or_none() - assert item is not None - extra_user = db.query(User).filter(User.username == EXTRA_USER["username"]).one_or_none() - assert extra_user is not None - item.created_by_id = extra_user.id - item.modified_by_id = extra_user.id - db.add(item) - db.commit() - - -def change_to_inactive_license(db, urn): - """Change the license of the score set with given urn to an inactive license.""" - item = db.query(ScoreSetDbModel).filter(ScoreSetDbModel.urn == urn).one_or_none() - assert item is not None - license = db.query(License).filter(License.active.is_(False)).first() - assert license is not None - item.license_id = license.id - db.add(item) - db.commit() - - -def create_collection(client, update=None): - collection_payload = deepcopy(TEST_COLLECTION) - if update is not None: - collection_payload.update(update) - - response = client.post("/api/v1/collections/", json=collection_payload) - assert response.status_code == 200, "Could not create collection." - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=Collection.schema()) - return response_data - - -def create_experiment(client, update=None): - experiment_payload = deepcopy(TEST_MINIMAL_EXPERIMENT) - if update is not None: - experiment_payload.update(update) - jsonschema.validate(instance=experiment_payload, schema=ExperimentCreate.schema()) - - response = client.post("/api/v1/experiments/", json=experiment_payload) - assert response.status_code == 200, "Could not create experiment." - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=Experiment.schema()) - return response_data - - -def create_seq_score_set(client, experiment_urn, update=None): - score_set_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) - if experiment_urn is not None: - score_set_payload["experimentUrn"] = experiment_urn - if update is not None: - score_set_payload.update(update) - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - - response = client.post("/api/v1/score-sets/", json=score_set_payload) - assert ( - response.status_code == 200 - ), f"Could not create sequence based score set (no variants) within experiment {experiment_urn}" - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def create_acc_score_set(client, experiment_urn, update=None): - score_set_payload = deepcopy(TEST_MINIMAL_ACC_SCORESET) - if experiment_urn is not None: - score_set_payload["experimentUrn"] = experiment_urn - if update is not None: - score_set_payload.update(update) - jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) - - with patch.object(cdot.hgvs.dataproviders.RESTDataProvider, "_get_transcript", return_value=TEST_CDOT_TRANSCRIPT): - response = client.post("/api/v1/score-sets/", json=score_set_payload) - - assert ( - response.status_code == 200 - ), f"Could not create accession based score set (no variants) within experiment {experiment_urn}" - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path): - with ( - open(scores_csv_path, "rb") as score_file, - patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue, - ): - files = {"scores_file": (scores_csv_path.name, score_file, "rb")} - - if counts_csv_path is not None: - counts_file = open(counts_csv_path, "rb") - files["counts_file"] = (counts_csv_path.name, counts_file, "rb") - else: - counts_file = None - - response = client.post(f"/api/v1/score-sets/{score_set['urn']}/variants/data", files=files) - - # Assert we have mocked a job being added to the queue, and that the request succeeded. The - # response value here isn't important- we will add variants to the score set manually. - worker_queue.assert_called_once() - assert response.status_code == 200 - - if counts_file is not None: - counts_file.close() - - # Reopen files since their buffers are consumed while mocking the variant data post request. - with open(scores_csv_path, "rb") as score_file: - score_df = csv_data_to_df(score_file) - - if counts_csv_path is not None: - with open(counts_csv_path, "rb") as counts_file: - counts_df = csv_data_to_df(counts_file) - else: - counts_df = None - - # Insert variant manually, worker jobs are tested elsewhere separately. - item = db.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"])).one_or_none() - assert item is not None - - scores, counts = validate_and_standardize_dataframe_pair(score_df, counts_df, item.target_genes, data_provider) - variants = create_variants_data(scores, counts, None) - num_variants = create_variants(db, item, variants) - assert num_variants == 3 - - item.processing_state = ProcessingState.success - item.dataset_columns = { - "score_columns": columns_for_dataset(scores), - "count_columns": columns_for_dataset(counts), - } - - db.add(item) - db.commit() - - return client.get(f"/api/v1/score-sets/{score_set['urn']}").json() - - -def create_mapped_variants_for_score_set(db, score_set_urn): - score_set = db.scalar(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)) - targets = db.scalars(select(TargetGene).where(TargetGene.score_set_id == score_set.id)) - variants = db.scalars(select(Variant).where(Variant.score_set_id == score_set.id)).all() - - for variant in variants: - mv = MappedVariant(**TEST_MINIMAL_MAPPED_VARIANT, variant_id=variant.id) - db.add(mv) - - for target in targets: - target.pre_mapped_metadata = TEST_MINIMAL_PRE_MAPPED_METADATA - target.post_mapped_metadata = TEST_MINIMAL_POST_MAPPED_METADATA - db.add(target) - - score_set.mapping_state = MappingState.complete - db.commit() - return - - -def mock_worker_vrs_mapping(client, db, score_set, alleles=True): - # The mapping job is tested elsewhere, so insert mapped variants manually. - variants = db.scalars( - select(VariantDbModel).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set["urn"]) - ).all() - - # It's un-important what the contents of each mapped VRS object are, so use the same constant for each variant. - for variant in variants: - mapped_variant = MappedVariantDbModel( - pre_mapped=TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X - if alleles - else TEST_VALID_PRE_MAPPED_VRS_CIS_PHASED_BLOCK, - post_mapped=TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X - if alleles - else TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, - variant=variant, - vrs_version="2.0", - modification_date=date.today(), - mapped_date=date.today(), - mapping_api_version="pytest.0.0", - current=True, - ) - db.add(mapped_variant) - - db.commit() - - return client.get(f"/api/v1/score-sets/{score_set['urn']}").json() - - -def create_seq_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_seq_score_set(client, experiment_urn, update) - score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) - - assert ( - score_set["numVariants"] == 3 - ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def create_acc_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_acc_score_set(client, experiment_urn, update) - score_set = mock_worker_variant_insertion(client, db, data_provider, score_set, scores_csv_path, counts_csv_path) - - assert ( - score_set["numVariants"] == 3 - ), f"Could not create sequence based score set with variants within experiment {experiment_urn}" - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def publish_score_set(client, score_set_urn): - with patch.object(ArqRedis, "enqueue_job", return_value=None) as worker_queue: - response = client.post(f"/api/v1/score-sets/{score_set_urn}/publish") - assert response.status_code == 200, f"Could not publish score set {score_set_urn}" - worker_queue.assert_called_once() - - response_data = response.json() - jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) - return response_data - - -def create_api_key_for_current_user(client): - response = client.post("api/v1/users/me/access-keys") - assert response.status_code == 200 - return response.json()["keyId"] - - -def create_admin_key_for_current_user(client): - response = client.post("api/v1/users/me/access-keys/admin") - assert response.status_code == 200 - return response.json()["keyId"] - - -def mark_user_inactive(session, username): - user = session.query(User).where(User.username == username).one() - user.is_active = False - - session.add(user) - session.commit() - session.refresh(user) - - return user - - -async def awaitable_exception(): - return Exception() - - -def update_expected_response_for_created_resources(expected_response, created_experiment, created_score_set): - expected_response.update({"urn": created_score_set["urn"]}) - expected_response["experiment"].update( - { - "urn": created_experiment["urn"], - "experimentSetUrn": created_experiment["experimentSetUrn"], - "scoreSetUrns": [created_score_set["urn"]], - } - ) - - return expected_response - - -def create_seq_score_set_with_mapped_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_seq_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path - ) - score_set = mock_worker_vrs_mapping(client, db, score_set) - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def create_acc_score_set_with_mapped_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None -): - score_set = create_acc_score_set_with_variants( - client, db, data_provider, experiment_urn, scores_csv_path, update, counts_csv_path - ) - score_set = mock_worker_vrs_mapping(client, db, score_set) - - jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - return score_set - - -def link_clinical_controls_to_mapped_variants(db, score_set): - mapped_variants = db.scalars( - select(MappedVariantDbModel) - .join(VariantDbModel) - .join(ScoreSetDbModel) - .where(ScoreSetDbModel.urn == score_set["urn"]) - ).all() - - # The first mapped variant gets the clinvar control, the second gets the generic control. - mapped_variants[0].clinical_controls.append( - db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 1)) - ) - mapped_variants[1].clinical_controls.append( - db.scalar(select(ClinicalControlDbModel).where(ClinicalControlDbModel.id == 2)) - ) - - db.add(mapped_variants[0]) - db.add(mapped_variants[1]) - db.commit() From a71b71f1bab8913b04f73ed726cc4de01ab35e5b Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 6 May 2025 11:33:06 -0700 Subject: [PATCH 117/166] Updated MaveHGVS to version 0.7.0. MaveHGVS 0.7.0 includes a fix to multi-variant validation. --- poetry.lock | 1497 ++++++++++++++++++++++++------------------------ pyproject.toml | 2 +- 2 files changed, 742 insertions(+), 757 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0084bab7..b9900552 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "alembic" @@ -142,13 +142,13 @@ cryptography = "*" [[package]] name = "beautifulsoup4" -version = "4.13.3" +version = "4.13.4" description = "Screen-scraping library" optional = true python-versions = ">=3.7.0" files = [ - {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, - {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, + {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, + {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, ] [package.dependencies] @@ -688,13 +688,13 @@ crt = ["awscrt (==0.21.2)"] [[package]] name = "botocore-stubs" -version = "1.37.16" +version = "1.38.9" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.8" files = [ - {file = "botocore_stubs-1.37.16-py3-none-any.whl", hash = "sha256:33973ee0e54ad5bf9f8560b2c36fc532b98540af6b9d4a57ffce5ae62a743a2a"}, - {file = "botocore_stubs-1.37.16.tar.gz", hash = "sha256:532376611ae0c49488b7bdac3674da9ac0de9a6c65198432790b11da41502caf"}, + {file = "botocore_stubs-1.38.9-py3-none-any.whl", hash = "sha256:2960c28500509acbe885b30907c997d96a6bfc492fb5165cebd45353111048d2"}, + {file = "botocore_stubs-1.38.9.tar.gz", hash = "sha256:a9fa4b77aebd463a6e0518961dc662f0e69bb8eb4fe035888fe9a1dbbf179b21"}, ] [package.dependencies] @@ -739,13 +739,13 @@ requests = "*" [[package]] name = "certifi" -version = "2025.1.31" +version = "2025.4.26" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, - {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, + {file = "certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3"}, + {file = "certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6"}, ] [[package]] @@ -840,103 +840,103 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.4.1" +version = "3.4.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" files = [ - {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, - {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, - {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cad5f45b3146325bb38d6855642f6fd609c3f7cad4dbaf75549bf3b904d3184"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2680962a4848b3c4f155dc2ee64505a9c57186d0d56b43123b17ca3de18f0fa"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36b31da18b8890a76ec181c3cf44326bf2c48e36d393ca1b72b3f484113ea344"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4074c5a429281bf056ddd4c5d3b740ebca4d43ffffe2ef4bf4d2d05114299da"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9e36a97bee9b86ef9a1cf7bb96747eb7a15c2f22bdb5b516434b00f2a599f02"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:1b1bde144d98e446b056ef98e59c256e9294f6b74d7af6846bf5ffdafd687a7d"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:915f3849a011c1f593ab99092f3cecfcb4d65d8feb4a64cf1bf2d22074dc0ec4"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:25a23ea5c7edc53e0f29bae2c44fcb5a1aa10591aae107f2a2b2583a9c5cbc64"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:770cab594ecf99ae64c236bc9ee3439c3f46be49796e265ce0cc8bc17b10294f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win32.whl", hash = "sha256:6a0289e4589e8bdfef02a80478f1dfcb14f0ab696b5a00e1f4b8a14a307a3c58"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6fc1f5b51fa4cecaa18f2bd7a003f3dd039dd615cd69a2afd6d3b19aed6775f2"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76af085e67e56c8816c3ccf256ebd136def2ed9654525348cfa744b6802b69eb"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45ba65510e2647721e35323d6ef54c7974959f6081b58d4ef5d87c60c84919a"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75d10d37a47afee94919c4fab4c22b9bc2a8bf7d4f46f87363bcf0573f3ff4f5"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6333b3aa5a12c26b2a4d4e7335a28f1475e0e5e17d69d55141ee3cab736f66d1"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8323a9b031aa0393768b87f04b4164a40037fb2a3c11ac06a03ffecd3618027"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:24498ba8ed6c2e0b56d4acbf83f2d989720a93b41d712ebd4f4979660db4417b"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:844da2b5728b5ce0e32d863af26f32b5ce61bc4273a9c720a9f3aa9df73b1455"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:65c981bdbd3f57670af8b59777cbfae75364b483fa8a9f420f08094531d54a01"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3c21d4fca343c805a52c0c78edc01e3477f6dd1ad7c47653241cf2a206d4fc58"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dc7039885fa1baf9be153a0626e337aa7ec8bf96b0128605fb0d77788ddc1681"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win32.whl", hash = "sha256:8272b73e1c5603666618805fe821edba66892e2870058c94c53147602eab29c7"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:70f7172939fdf8790425ba31915bfbe8335030f05b9913d7ae00a87d4395620a"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e92fca20c46e9f5e1bb485887d074918b13543b1c2a1185e69bb8d17ab6236a7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50bf98d5e563b83cc29471fa114366e6806bc06bc7a25fd59641e41445327836"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721c76e84fe669be19c5791da68232ca2e05ba5185575086e384352e2c309597"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d8fd25b7f4675d0c47cf95b594d4e7b158aca33b76aa63d07186e13c0e0ab7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3daeac64d5b371dea99714f08ffc2c208522ec6b06fbc7866a450dd446f5c0f"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dccab8d5fa1ef9bfba0590ecf4d46df048d18ffe3eec01eeb73a42e0d9e7a8ba"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aaf27faa992bfee0264dc1f03f4c75e9fcdda66a519db6b957a3f826e285cf12"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:eb30abc20df9ab0814b5a2524f23d75dcf83cde762c161917a2b4b7b55b1e518"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c72fbbe68c6f32f251bdc08b8611c7b3060612236e960ef848e0a517ddbe76c5"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:982bb1e8b4ffda883b3d0a521e23abcd6fd17418f6d2c4118d257a10199c0ce3"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win32.whl", hash = "sha256:43e0933a0eff183ee85833f341ec567c0980dae57c464d8a508e1b2ceb336471"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:d11b54acf878eef558599658b0ffca78138c8c3655cf4f3a4a673c437e67732e"}, + {file = "charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0"}, + {file = "charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63"}, ] [[package]] @@ -1002,74 +1002,74 @@ type = ["pytest-mypy"] [[package]] name = "coverage" -version = "7.7.0" +version = "7.8.0" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" files = [ - {file = "coverage-7.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a538a23119d1e2e2ce077e902d02ea3d8e0641786ef6e0faf11ce82324743944"}, - {file = "coverage-7.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1586ad158523f4133499a4f322b230e2cfef9cc724820dbd58595a5a236186f4"}, - {file = "coverage-7.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b6c96d69928a3a6767fab8dc1ce8a02cf0156836ccb1e820c7f45a423570d98"}, - {file = "coverage-7.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f18d47641282664276977c604b5a261e51fefc2980f5271d547d706b06a837f"}, - {file = "coverage-7.7.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a1e18a85bd066c7c556d85277a7adf4651f259b2579113844835ba1a74aafd"}, - {file = "coverage-7.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:70f0925c4e2bfc965369f417e7cc72538fd1ba91639cf1e4ef4b1a6b50439b3b"}, - {file = "coverage-7.7.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b0fac2088ec4aaeb5468b814bd3ff5e5978364bfbce5e567c44c9e2854469f6c"}, - {file = "coverage-7.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b3e212a894d8ae07fde2ca8b43d666a6d49bbbddb10da0f6a74ca7bd31f20054"}, - {file = "coverage-7.7.0-cp310-cp310-win32.whl", hash = "sha256:f32b165bf6dfea0846a9c9c38b7e1d68f313956d60a15cde5d1709fddcaf3bee"}, - {file = "coverage-7.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:a2454b12a3f12cc4698f3508912e6225ec63682e2ca5a96f80a2b93cef9e63f3"}, - {file = "coverage-7.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a0a207c87a9f743c8072d059b4711f8d13c456eb42dac778a7d2e5d4f3c253a7"}, - {file = "coverage-7.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2d673e3add00048215c2cc507f1228a7523fd8bf34f279ac98334c9b07bd2656"}, - {file = "coverage-7.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f81fe93dc1b8e5673f33443c0786c14b77e36f1025973b85e07c70353e46882b"}, - {file = "coverage-7.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8c7524779003d59948c51b4fcbf1ca4e27c26a7d75984f63488f3625c328b9b"}, - {file = "coverage-7.7.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c124025430249118d018dcedc8b7426f39373527c845093132196f2a483b6dd"}, - {file = "coverage-7.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e7f559c36d5cdc448ee13e7e56ed7b6b5d44a40a511d584d388a0f5d940977ba"}, - {file = "coverage-7.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:37cbc7b0d93dfd133e33c7ec01123fbb90401dce174c3b6661d8d36fb1e30608"}, - {file = "coverage-7.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7d2a65876274acf544703e943c010b60bd79404e3623a1e5d52b64a6e2728de5"}, - {file = "coverage-7.7.0-cp311-cp311-win32.whl", hash = "sha256:f5a2f71d6a91238e7628f23538c26aa464d390cbdedf12ee2a7a0fb92a24482a"}, - {file = "coverage-7.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:ae8006772c6b0fa53c33747913473e064985dac4d65f77fd2fdc6474e7cd54e4"}, - {file = "coverage-7.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:056d3017ed67e7ddf266e6f57378ece543755a4c9231e997789ab3bd11392c94"}, - {file = "coverage-7.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33c1394d8407e2771547583b66a85d07ed441ff8fae5a4adb4237ad39ece60db"}, - {file = "coverage-7.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fbb7a0c3c21908520149d7751cf5b74eb9b38b54d62997b1e9b3ac19a8ee2fe"}, - {file = "coverage-7.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb356e7ae7c2da13f404bf8f75be90f743c6df8d4607022e759f5d7d89fe83f8"}, - {file = "coverage-7.7.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bce730d484038e97f27ea2dbe5d392ec5c2261f28c319a3bb266f6b213650135"}, - {file = "coverage-7.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa4dff57fc21a575672176d5ab0ef15a927199e775c5e8a3d75162ab2b0c7705"}, - {file = "coverage-7.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b667b91f4f714b17af2a18e220015c941d1cf8b07c17f2160033dbe1e64149f0"}, - {file = "coverage-7.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:693d921621a0c8043bfdc61f7d4df5ea6d22165fe8b807cac21eb80dd94e4bbd"}, - {file = "coverage-7.7.0-cp312-cp312-win32.whl", hash = "sha256:52fc89602cde411a4196c8c6894afb384f2125f34c031774f82a4f2608c59d7d"}, - {file = "coverage-7.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ce8cf59e09d31a4915ff4c3b94c6514af4c84b22c4cc8ad7c3c546a86150a92"}, - {file = "coverage-7.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4545485fef7a8a2d8f30e6f79ce719eb154aab7e44217eb444c1d38239af2072"}, - {file = "coverage-7.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1393e5aa9441dafb0162c36c8506c648b89aea9565b31f6bfa351e66c11bcd82"}, - {file = "coverage-7.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:316f29cc3392fa3912493ee4c83afa4a0e2db04ff69600711f8c03997c39baaa"}, - {file = "coverage-7.7.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1ffde1d6bc2a92f9c9207d1ad808550873748ac2d4d923c815b866baa343b3f"}, - {file = "coverage-7.7.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:416e2a8845eaff288f97eaf76ab40367deafb9073ffc47bf2a583f26b05e5265"}, - {file = "coverage-7.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5efdeff5f353ed3352c04e6b318ab05c6ce9249c25ed3c2090c6e9cadda1e3b2"}, - {file = "coverage-7.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:57f3bd0d29bf2bd9325c0ff9cc532a175110c4bf8f412c05b2405fd35745266d"}, - {file = "coverage-7.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ab7090f04b12dc6469882ce81244572779d3a4b67eea1c96fb9ecc8c607ef39"}, - {file = "coverage-7.7.0-cp313-cp313-win32.whl", hash = "sha256:180e3fc68ee4dc5af8b33b6ca4e3bb8aa1abe25eedcb958ba5cff7123071af68"}, - {file = "coverage-7.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:55143aa13c49491f5606f05b49ed88663446dce3a4d3c5d77baa4e36a16d3573"}, - {file = "coverage-7.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:cc41374d2f27d81d6558f8a24e5c114580ffefc197fd43eabd7058182f743322"}, - {file = "coverage-7.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:89078312f06237417adda7c021c33f80f7a6d2db8572a5f6c330d89b080061ce"}, - {file = "coverage-7.7.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b2f144444879363ea8834cd7b6869d79ac796cb8f864b0cfdde50296cd95816"}, - {file = "coverage-7.7.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:60e6347d1ed882b1159ffea172cb8466ee46c665af4ca397edbf10ff53e9ffaf"}, - {file = "coverage-7.7.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb203c0afffaf1a8f5b9659a013f8f16a1b2cad3a80a8733ceedc968c0cf4c57"}, - {file = "coverage-7.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ad0edaa97cb983d9f2ff48cadddc3e1fb09f24aa558abeb4dc9a0dbacd12cbb4"}, - {file = "coverage-7.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:c5f8a5364fc37b2f172c26a038bc7ec4885f429de4a05fc10fdcb53fb5834c5c"}, - {file = "coverage-7.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4e09534037933bf6eb31d804e72c52ec23219b32c1730f9152feabbd7499463"}, - {file = "coverage-7.7.0-cp313-cp313t-win32.whl", hash = "sha256:1b336d06af14f8da5b1f391e8dec03634daf54dfcb4d1c4fb6d04c09d83cef90"}, - {file = "coverage-7.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b54a1ee4c6f1905a436cbaa04b26626d27925a41cbc3a337e2d3ff7038187f07"}, - {file = "coverage-7.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1c8fbce80b2b8bf135d105aa8f5b36eae0c57d702a1cc3ebdea2a6f03f6cdde5"}, - {file = "coverage-7.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d9710521f07f526de30ccdead67e6b236fe996d214e1a7fba8b36e2ba2cd8261"}, - {file = "coverage-7.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7789e700f33f2b133adae582c9f437523cd5db8de845774988a58c360fc88253"}, - {file = "coverage-7.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8c36093aca722db73633cf2359026ed7782a239eb1c6db2abcff876012dc4cf"}, - {file = "coverage-7.7.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c075d167a6ec99b798c1fdf6e391a1d5a2d054caffe9593ba0f97e3df2c04f0e"}, - {file = "coverage-7.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d013c07061751ae81861cae6ec3a4fe04e84781b11fd4b6b4201590234b25c7b"}, - {file = "coverage-7.7.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:104bf640f408f4e115b85110047c7f27377e1a8b7ba86f7db4fa47aa49dc9a8e"}, - {file = "coverage-7.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:39abcacd1ed54e2c33c54bdc488b310e8ef6705833f7148b6eb9a547199d375d"}, - {file = "coverage-7.7.0-cp39-cp39-win32.whl", hash = "sha256:8e336b56301774ace6be0017ff85c3566c556d938359b61b840796a0202f805c"}, - {file = "coverage-7.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:8c938c6ae59be67ac19a7204e079efc94b38222cd7d0269f96e45e18cddeaa59"}, - {file = "coverage-7.7.0-pp39.pp310.pp311-none-any.whl", hash = "sha256:3b0e6e54591ae0d7427def8a4d40fca99df6b899d10354bab73cd5609807261c"}, - {file = "coverage-7.7.0-py3-none-any.whl", hash = "sha256:708f0a1105ef2b11c79ed54ed31f17e6325ac936501fc373f24be3e6a578146a"}, - {file = "coverage-7.7.0.tar.gz", hash = "sha256:cd879d4646055a573775a1cec863d00c9ff8c55860f8b17f6d8eee9140c06166"}, + {file = "coverage-7.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2931f66991175369859b5fd58529cd4b73582461877ecfd859b6549869287ffe"}, + {file = "coverage-7.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52a523153c568d2c0ef8826f6cc23031dc86cffb8c6aeab92c4ff776e7951b28"}, + {file = "coverage-7.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c8a5c139aae4c35cbd7cadca1df02ea8cf28a911534fc1b0456acb0b14234f3"}, + {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a26c0c795c3e0b63ec7da6efded5f0bc856d7c0b24b2ac84b4d1d7bc578d676"}, + {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821f7bcbaa84318287115d54becb1915eece6918136c6f91045bb84e2f88739d"}, + {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a321c61477ff8ee705b8a5fed370b5710c56b3a52d17b983d9215861e37b642a"}, + {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ed2144b8a78f9d94d9515963ed273d620e07846acd5d4b0a642d4849e8d91a0c"}, + {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:042e7841a26498fff7a37d6fda770d17519982f5b7d8bf5278d140b67b61095f"}, + {file = "coverage-7.8.0-cp310-cp310-win32.whl", hash = "sha256:f9983d01d7705b2d1f7a95e10bbe4091fabc03a46881a256c2787637b087003f"}, + {file = "coverage-7.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a570cd9bd20b85d1a0d7b009aaf6c110b52b5755c17be6962f8ccd65d1dbd23"}, + {file = "coverage-7.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7ac22a0bb2c7c49f441f7a6d46c9c80d96e56f5a8bc6972529ed43c8b694e27"}, + {file = "coverage-7.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf13d564d310c156d1c8e53877baf2993fb3073b2fc9f69790ca6a732eb4bfea"}, + {file = "coverage-7.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5761c70c017c1b0d21b0815a920ffb94a670c8d5d409d9b38857874c21f70d7"}, + {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ff52d790c7e1628241ffbcaeb33e07d14b007b6eb00a19320c7b8a7024c040"}, + {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d39fc4817fd67b3915256af5dda75fd4ee10621a3d484524487e33416c6f3543"}, + {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b44674870709017e4b4036e3d0d6c17f06a0e6d4436422e0ad29b882c40697d2"}, + {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8f99eb72bf27cbb167b636eb1726f590c00e1ad375002230607a844d9e9a2318"}, + {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b571bf5341ba8c6bc02e0baeaf3b061ab993bf372d982ae509807e7f112554e9"}, + {file = "coverage-7.8.0-cp311-cp311-win32.whl", hash = "sha256:e75a2ad7b647fd8046d58c3132d7eaf31b12d8a53c0e4b21fa9c4d23d6ee6d3c"}, + {file = "coverage-7.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3043ba1c88b2139126fc72cb48574b90e2e0546d4c78b5299317f61b7f718b78"}, + {file = "coverage-7.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bbb5cc845a0292e0c520656d19d7ce40e18d0e19b22cb3e0409135a575bf79fc"}, + {file = "coverage-7.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4dfd9a93db9e78666d178d4f08a5408aa3f2474ad4d0e0378ed5f2ef71640cb6"}, + {file = "coverage-7.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f017a61399f13aa6d1039f75cd467be388d157cd81f1a119b9d9a68ba6f2830d"}, + {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0915742f4c82208ebf47a2b154a5334155ed9ef9fe6190674b8a46c2fb89cb05"}, + {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a40fcf208e021eb14b0fac6bdb045c0e0cab53105f93ba0d03fd934c956143a"}, + {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a1f406a8e0995d654b2ad87c62caf6befa767885301f3b8f6f73e6f3c31ec3a6"}, + {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:77af0f6447a582fdc7de5e06fa3757a3ef87769fbb0fdbdeba78c23049140a47"}, + {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f2d32f95922927186c6dbc8bc60df0d186b6edb828d299ab10898ef3f40052fe"}, + {file = "coverage-7.8.0-cp312-cp312-win32.whl", hash = "sha256:769773614e676f9d8e8a0980dd7740f09a6ea386d0f383db6821df07d0f08545"}, + {file = "coverage-7.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5d2b9be5b0693cf21eb4ce0ec8d211efb43966f6657807f6859aab3814f946b"}, + {file = "coverage-7.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ac46d0c2dd5820ce93943a501ac5f6548ea81594777ca585bf002aa8854cacd"}, + {file = "coverage-7.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:771eb7587a0563ca5bb6f622b9ed7f9d07bd08900f7589b4febff05f469bea00"}, + {file = "coverage-7.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42421e04069fb2cbcbca5a696c4050b84a43b05392679d4068acbe65449b5c64"}, + {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:554fec1199d93ab30adaa751db68acec2b41c5602ac944bb19187cb9a41a8067"}, + {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008"}, + {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:581a40c7b94921fffd6457ffe532259813fc68eb2bdda60fa8cc343414ce3733"}, + {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f319bae0321bc838e205bf9e5bc28f0a3165f30c203b610f17ab5552cff90323"}, + {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04bfec25a8ef1c5f41f5e7e5c842f6b615599ca8ba8391ec33a9290d9d2db3a3"}, + {file = "coverage-7.8.0-cp313-cp313-win32.whl", hash = "sha256:dd19608788b50eed889e13a5d71d832edc34fc9dfce606f66e8f9f917eef910d"}, + {file = "coverage-7.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:a9abbccd778d98e9c7e85038e35e91e67f5b520776781d9a1e2ee9d400869487"}, + {file = "coverage-7.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:18c5ae6d061ad5b3e7eef4363fb27a0576012a7447af48be6c75b88494c6cf25"}, + {file = "coverage-7.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:95aa6ae391a22bbbce1b77ddac846c98c5473de0372ba5c463480043a07bff42"}, + {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e013b07ba1c748dacc2a80e69a46286ff145935f260eb8c72df7185bf048f502"}, + {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d766a4f0e5aa1ba056ec3496243150698dc0481902e2b8559314368717be82b1"}, + {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad80e6b4a0c3cb6f10f29ae4c60e991f424e6b14219d46f1e7d442b938ee68a4"}, + {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b87eb6fc9e1bb8f98892a2458781348fa37e6925f35bb6ceb9d4afd54ba36c73"}, + {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d1ba00ae33be84066cfbe7361d4e04dec78445b2b88bdb734d0d1cbab916025a"}, + {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f3c38e4e5ccbdc9198aecc766cedbb134b2d89bf64533973678dfcf07effd883"}, + {file = "coverage-7.8.0-cp313-cp313t-win32.whl", hash = "sha256:379fe315e206b14e21db5240f89dc0774bdd3e25c3c58c2c733c99eca96f1ada"}, + {file = "coverage-7.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e4b6b87bb0c846a9315e3ab4be2d52fac905100565f4b92f02c445c8799e257"}, + {file = "coverage-7.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa260de59dfb143af06dcf30c2be0b200bed2a73737a8a59248fcb9fa601ef0f"}, + {file = "coverage-7.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:96121edfa4c2dfdda409877ea8608dd01de816a4dc4a0523356067b305e4e17a"}, + {file = "coverage-7.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b8af63b9afa1031c0ef05b217faa598f3069148eeee6bb24b79da9012423b82"}, + {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89b1f4af0d4afe495cd4787a68e00f30f1d15939f550e869de90a86efa7e0814"}, + {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94ec0be97723ae72d63d3aa41961a0b9a6f5a53ff599813c324548d18e3b9e8c"}, + {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a1d96e780bdb2d0cbb297325711701f7c0b6f89199a57f2049e90064c29f6bd"}, + {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f1d8a2a57b47142b10374902777e798784abf400a004b14f1b0b9eaf1e528ba4"}, + {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cf60dd2696b457b710dd40bf17ad269d5f5457b96442f7f85722bdb16fa6c899"}, + {file = "coverage-7.8.0-cp39-cp39-win32.whl", hash = "sha256:be945402e03de47ba1872cd5236395e0f4ad635526185a930735f66710e1bd3f"}, + {file = "coverage-7.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:90e7fbc6216ecaffa5a880cdc9c77b7418c1dcb166166b78dbc630d07f278cc3"}, + {file = "coverage-7.8.0-pp39.pp310.pp311-none-any.whl", hash = "sha256:b8194fb8e50d556d5849753de991d390c5a1edeeba50f68e3a9253fbd8bf8ccd"}, + {file = "coverage-7.8.0-py3-none-any.whl", hash = "sha256:dbf364b4c5e7bae9250528167dfe40219b62e2d573c854d74be213e1e52069f7"}, + {file = "coverage-7.8.0.tar.gz", hash = "sha256:7a3d62b3b03b4b6fd41a085f3574874cf946cb4604d2b4d3e8dca8cd570ca501"}, ] [package.dependencies] @@ -1340,84 +1340,66 @@ dev = ["black", "flake8", "flake8-pyproject", "mypy", "pre-commit", "pytest"] [[package]] name = "greenlet" -version = "3.1.1" +version = "3.2.1" description = "Lightweight in-process concurrent programming" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, - {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, - {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"}, - {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"}, - {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"}, - {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"}, - {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"}, - {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"}, - {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"}, - {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"}, - {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"}, - {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"}, - {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"}, - {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"}, - {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"}, - {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"}, - {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"}, - {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"}, - {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"}, - {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"}, - {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"}, - {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"}, - {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"}, - {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"}, - {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"}, - {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"}, - {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"}, - {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"}, - {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"}, - {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"}, - {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"}, - {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"}, - {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"}, - {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"}, - {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"}, - {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"}, - {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"}, - {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"}, - {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"}, - {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"}, - {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"}, - {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"}, - {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"}, - {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"}, - {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"}, - {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"}, - {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"}, - {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"}, - {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"}, - {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"}, - {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"}, - {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"}, - {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"}, - {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"}, - {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"}, - {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"}, - {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"}, - {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"}, - {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"}, - {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"}, - {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"}, - {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"}, - {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"}, - {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"}, - {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"}, - {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"}, - {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"}, - {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"}, - {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"}, - {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"}, - {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"}, - {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"}, - {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"}, + {file = "greenlet-3.2.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:777c1281aa7c786738683e302db0f55eb4b0077c20f1dc53db8852ffaea0a6b0"}, + {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3059c6f286b53ea4711745146ffe5a5c5ff801f62f6c56949446e0f6461f8157"}, + {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e1a40a17e2c7348f5eee5d8e1b4fa6a937f0587eba89411885a36a8e1fc29bd2"}, + {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5193135b3a8d0017cb438de0d49e92bf2f6c1c770331d24aa7500866f4db4017"}, + {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:639a94d001fe874675b553f28a9d44faed90f9864dc57ba0afef3f8d76a18b04"}, + {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fe303381e7e909e42fb23e191fc69659910909fdcd056b92f6473f80ef18543"}, + {file = "greenlet-3.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:72c9b668454e816b5ece25daac1a42c94d1c116d5401399a11b77ce8d883110c"}, + {file = "greenlet-3.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6079ae990bbf944cf66bea64a09dcb56085815630955109ffa98984810d71565"}, + {file = "greenlet-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:e63cd2035f49376a23611fbb1643f78f8246e9d4dfd607534ec81b175ce582c2"}, + {file = "greenlet-3.2.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:aa30066fd6862e1153eaae9b51b449a6356dcdb505169647f69e6ce315b9468b"}, + {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b0f3a0a67786facf3b907a25db80efe74310f9d63cc30869e49c79ee3fcef7e"}, + {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64a4d0052de53ab3ad83ba86de5ada6aeea8f099b4e6c9ccce70fb29bc02c6a2"}, + {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:852ef432919830022f71a040ff7ba3f25ceb9fe8f3ab784befd747856ee58530"}, + {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4818116e75a0dd52cdcf40ca4b419e8ce5cb6669630cb4f13a6c384307c9543f"}, + {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9afa05fe6557bce1642d8131f87ae9462e2a8e8c46f7ed7929360616088a3975"}, + {file = "greenlet-3.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5c12f0d17a88664757e81a6e3fc7c2452568cf460a2f8fb44f90536b2614000b"}, + {file = "greenlet-3.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dbb4e1aa2000852937dd8f4357fb73e3911da426df8ca9b8df5db231922da474"}, + {file = "greenlet-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:cb5ee928ce5fedf9a4b0ccdc547f7887136c4af6109d8f2fe8e00f90c0db47f5"}, + {file = "greenlet-3.2.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:0ba2811509a30e5f943be048895a983a8daf0b9aa0ac0ead526dfb5d987d80ea"}, + {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4245246e72352b150a1588d43ddc8ab5e306bef924c26571aafafa5d1aaae4e8"}, + {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7abc0545d8e880779f0c7ce665a1afc3f72f0ca0d5815e2b006cafc4c1cc5840"}, + {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6dcc6d604a6575c6225ac0da39df9335cc0c6ac50725063fa90f104f3dbdb2c9"}, + {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2273586879affca2d1f414709bb1f61f0770adcabf9eda8ef48fd90b36f15d12"}, + {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ff38c869ed30fff07f1452d9a204ece1ec6d3c0870e0ba6e478ce7c1515acf22"}, + {file = "greenlet-3.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e934591a7a4084fa10ee5ef50eb9d2ac8c4075d5c9cf91128116b5dca49d43b1"}, + {file = "greenlet-3.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:063bcf7f8ee28eb91e7f7a8148c65a43b73fbdc0064ab693e024b5a940070145"}, + {file = "greenlet-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7132e024ebeeeabbe661cf8878aac5d2e643975c4feae833142592ec2f03263d"}, + {file = "greenlet-3.2.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:e1967882f0c42eaf42282a87579685c8673c51153b845fde1ee81be720ae27ac"}, + {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e77ae69032a95640a5fe8c857ec7bee569a0997e809570f4c92048691ce4b437"}, + {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3227c6ec1149d4520bc99edac3b9bc8358d0034825f3ca7572165cb502d8f29a"}, + {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ddda0197c5b46eedb5628d33dad034c455ae77708c7bf192686e760e26d6a0c"}, + {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de62b542e5dcf0b6116c310dec17b82bb06ef2ceb696156ff7bf74a7a498d982"}, + {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c07a0c01010df42f1f058b3973decc69c4d82e036a951c3deaf89ab114054c07"}, + {file = "greenlet-3.2.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2530bfb0abcd451ea81068e6d0a1aac6dabf3f4c23c8bd8e2a8f579c2dd60d95"}, + {file = "greenlet-3.2.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1c472adfca310f849903295c351d297559462067f618944ce2650a1878b84123"}, + {file = "greenlet-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:24a496479bc8bd01c39aa6516a43c717b4cee7196573c47b1f8e1011f7c12495"}, + {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:175d583f7d5ee57845591fc30d852b75b144eb44b05f38b67966ed6df05c8526"}, + {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ecc9d33ca9428e4536ea53e79d781792cee114d2fa2695b173092bdbd8cd6d5"}, + {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f56382ac4df3860ebed8ed838f268f03ddf4e459b954415534130062b16bc32"}, + {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc45a7189c91c0f89aaf9d69da428ce8301b0fd66c914a499199cfb0c28420fc"}, + {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51a2f49da08cff79ee42eb22f1658a2aed60c72792f0a0a95f5f0ca6d101b1fb"}, + {file = "greenlet-3.2.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:0c68bbc639359493420282d2f34fa114e992a8724481d700da0b10d10a7611b8"}, + {file = "greenlet-3.2.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:e775176b5c203a1fa4be19f91da00fd3bff536868b77b237da3f4daa5971ae5d"}, + {file = "greenlet-3.2.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d6668caf15f181c1b82fb6406f3911696975cc4c37d782e19cb7ba499e556189"}, + {file = "greenlet-3.2.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:17964c246d4f6e1327edd95e2008988a8995ae3a7732be2f9fc1efed1f1cdf8c"}, + {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04b4ec7f65f0e4a1500ac475c9343f6cc022b2363ebfb6e94f416085e40dea15"}, + {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b38d53cf268da963869aa25a6e4cc84c1c69afc1ae3391738b2603d110749d01"}, + {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a7490f74e8aabc5f29256765a99577ffde979920a2db1f3676d265a3adba41"}, + {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4339b202ac20a89ccd5bde0663b4d00dc62dd25cb3fb14f7f3034dec1b0d9ece"}, + {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a750f1046994b9e038b45ae237d68153c29a3a783075211fb1414a180c8324b"}, + {file = "greenlet-3.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:374ffebaa5fbd10919cd599e5cf8ee18bae70c11f9d61e73db79826c8c93d6f9"}, + {file = "greenlet-3.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b89e5d44f55372efc6072f59ced5ed1efb7b44213dab5ad7e0caba0232c6545"}, + {file = "greenlet-3.2.1-cp39-cp39-win32.whl", hash = "sha256:b7503d6b8bbdac6bbacf5a8c094f18eab7553481a1830975799042f26c9e101b"}, + {file = "greenlet-3.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:e98328b8b8f160925d6b1c5b1879d8e64f6bd8cf11472b7127d579da575b77d9"}, + {file = "greenlet-3.2.1.tar.gz", hash = "sha256:9f4dd4b4946b14bb3bf038f81e1d2e535b7d94f1b2a59fdba1293cd9c1a0a4d7"}, ] [package.extras] @@ -1426,13 +1408,13 @@ test = ["objgraph", "psutil"] [[package]] name = "h11" -version = "0.14.0" +version = "0.16.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, + {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, + {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, ] [[package]] @@ -1600,18 +1582,18 @@ lxml = ["lxml"] [[package]] name = "httpcore" -version = "1.0.7" +version = "1.0.9" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, - {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, + {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, + {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, ] [package.dependencies] certifi = "*" -h11 = ">=0.13,<0.15" +h11 = ">=0.16" [package.extras] asyncio = ["anyio (>=4.0,<5.0)"] @@ -1714,13 +1696,13 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.9" +version = "2.6.10" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.9-py2.py3-none-any.whl", hash = "sha256:c98b4322da415a8e5a70ff6e51fbc2d2932c015532d77e9f8537b4ba7813b150"}, - {file = "identify-2.6.9.tar.gz", hash = "sha256:d40dfe3142a1421d8518e3d3985ef5ac42890683e32306ad614a29490abeb6bf"}, + {file = "identify-2.6.10-py2.py3-none-any.whl", hash = "sha256:5f34248f54136beed1a7ba6a6b5c4b6cf21ff495aac7c359e1ef831ae3b8ab25"}, + {file = "identify-2.6.10.tar.gz", hash = "sha256:45e92fd704f3da71cc3880036633f48b4b7265fd4de2b57627cb157216eb7eb8"}, ] [package.extras] @@ -1760,13 +1742,13 @@ tests = ["pytest-black (>=0.3.0,<0.3.10)", "pytest-cache (>=1.0)", "pytest-inven [[package]] name = "importlib-metadata" -version = "8.6.1" +version = "8.7.0" description = "Read metadata from Python packages" optional = true python-versions = ">=3.9" files = [ - {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"}, - {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"}, + {file = "importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd"}, + {file = "importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000"}, ] [package.dependencies] @@ -1906,13 +1888,13 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jsonschema-specifications" -version = "2024.10.1" +version = "2025.4.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" files = [ - {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, - {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, + {file = "jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af"}, + {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, ] [package.dependencies] @@ -1935,149 +1917,140 @@ mypy = ["mypy"] [[package]] name = "lxml" -version = "5.3.1" +version = "5.4.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" files = [ - {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"}, - {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"}, - {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"}, - {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"}, - {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"}, - {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"}, - {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"}, - {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"}, - {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"}, - {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"}, - {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"}, - {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"}, - {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"}, - {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"}, - {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"}, - {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"}, - {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"}, - {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"}, - {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"}, - {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"}, - {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"}, - {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"}, - {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"}, - {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"}, - {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"}, - {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"}, - {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"}, - {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"}, - {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"}, - {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"}, - {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"}, - {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"}, - {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"}, - {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"}, - {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"}, - {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"}, - {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"}, - {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"}, - {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"}, - {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"}, - {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"}, - {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"}, - {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"}, - {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"}, - {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"}, - {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"}, - {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"}, - {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"}, - {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"}, - {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"}, - {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"}, - {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"}, - {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"}, - {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"}, - {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"}, - {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"}, - {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"}, - {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"}, - {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"}, - {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"}, - {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"}, - {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"}, - {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"}, - {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"}, - {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"}, - {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"}, - {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"}, - {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"}, - {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"}, - {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"}, - {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"}, - {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"}, - {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"}, - {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"}, - {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"}, - {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"}, - {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"}, - {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"}, + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c"}, + {file = "lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c"}, + {file = "lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5"}, + {file = "lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776"}, + {file = "lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7"}, + {file = "lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9"}, + {file = "lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8"}, + {file = "lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd"}, + {file = "lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751"}, + {file = "lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4"}, + {file = "lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4"}, + {file = "lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7"}, + {file = "lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f"}, + {file = "lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc"}, + {file = "lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f"}, + {file = "lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0"}, + {file = "lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8"}, + {file = "lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b"}, + {file = "lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a"}, + {file = "lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82"}, + {file = "lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f"}, + {file = "lxml-5.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7be701c24e7f843e6788353c055d806e8bd8466b52907bafe5d13ec6a6dbaecd"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb54f7c6bafaa808f27166569b1511fc42701a7713858dddc08afdde9746849e"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97dac543661e84a284502e0cf8a67b5c711b0ad5fb661d1bd505c02f8cf716d7"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:c70e93fba207106cb16bf852e421c37bbded92acd5964390aad07cb50d60f5cf"}, + {file = "lxml-5.4.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9c886b481aefdf818ad44846145f6eaf373a20d200b5ce1a5c8e1bc2d8745410"}, + {file = "lxml-5.4.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:fa0e294046de09acd6146be0ed6727d1f42ded4ce3ea1e9a19c11b6774eea27c"}, + {file = "lxml-5.4.0-cp36-cp36m-win32.whl", hash = "sha256:61c7bbf432f09ee44b1ccaa24896d21075e533cd01477966a5ff5a71d88b2f56"}, + {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, + {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, + {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, + {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, + {file = "lxml-5.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eaf24066ad0b30917186420d51e2e3edf4b0e2ea68d8cd885b14dc8afdcf6556"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b31a3a77501d86d8ade128abb01082724c0dfd9524f542f2f07d693c9f1175f"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e108352e203c7afd0eb91d782582f00a0b16a948d204d4dec8565024fafeea5"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11a96c3b3f7551c8a8109aa65e8594e551d5a84c76bf950da33d0fb6dfafab7"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:ca755eebf0d9e62d6cb013f1261e510317a41bf4650f22963474a663fdfe02aa"}, + {file = "lxml-5.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4cd915c0fb1bed47b5e6d6edd424ac25856252f09120e3e8ba5154b6b921860e"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:226046e386556a45ebc787871d6d2467b32c37ce76c2680f5c608e25823ffc84"}, + {file = "lxml-5.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b108134b9667bcd71236c5a02aad5ddd073e372fb5d48ea74853e009fe38acb6"}, + {file = "lxml-5.4.0-cp38-cp38-win32.whl", hash = "sha256:1320091caa89805df7dcb9e908add28166113dcd062590668514dbd510798c88"}, + {file = "lxml-5.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bda3ea44c39eb74e2488297bb39d47186ed01342f0022c8ff407c250ac3f498e"}, + {file = "lxml-5.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9ceaf423b50ecfc23ca00b7f50b64baba85fb3fb91c53e2c9d00bc86150c7e40"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:664cdc733bc87449fe781dbb1f309090966c11cc0c0cd7b84af956a02a8a4729"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67ed8a40665b84d161bae3181aa2763beea3747f748bca5874b4af4d75998f87"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b4a3bd174cc9cdaa1afbc4620c049038b441d6ba07629d89a83b408e54c35cd"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b0989737a3ba6cf2a16efb857fb0dfa20bc5c542737fddb6d893fde48be45433"}, + {file = "lxml-5.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:dc0af80267edc68adf85f2a5d9be1cdf062f973db6790c1d065e45025fa26140"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:639978bccb04c42677db43c79bdaa23785dc7f9b83bfd87570da8207872f1ce5"}, + {file = "lxml-5.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a99d86351f9c15e4a901fc56404b485b1462039db59288b203f8c629260a142"}, + {file = "lxml-5.4.0-cp39-cp39-win32.whl", hash = "sha256:3e6d5557989cdc3ebb5302bbdc42b439733a841891762ded9514e74f60319ad6"}, + {file = "lxml-5.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8c9b7f16b63e65bbba889acb436a1034a82d34fa09752d754f88d708eca80e1"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571"}, + {file = "lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5f11a1526ebd0dee85e7b1e39e39a0cc0d9d03fb527f56d8457f6df48a10dc0c"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48b4afaf38bf79109bb060d9016fad014a9a48fb244e11b94f74ae366a64d252"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de6f6bb8a7840c7bf216fb83eec4e2f79f7325eca8858167b68708b929ab2172"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5cca36a194a4eb4e2ed6be36923d3cffd03dcdf477515dea687185506583d4c9"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b7c86884ad23d61b025989d99bfdd92a7351de956e01c61307cb87035960bcb1"}, + {file = "lxml-5.4.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:53d9469ab5460402c19553b56c3648746774ecd0681b1b27ea74d5d8a3ef5590"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:56dbdbab0551532bb26c19c914848d7251d73edb507c3079d6805fa8bba5b706"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14479c2ad1cb08b62bb941ba8e0e05938524ee3c3114644df905d2331c76cd57"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697d2ea994e0db19c1df9e40275ffe84973e4232b5c274f47e7c1ec9763cdd"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:24f6df5f24fc3385f622c0c9d63fe34604893bc1a5bdbb2dbf5870f85f9a404a"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:151d6c40bc9db11e960619d2bf2ec5829f0aaffb10b41dcf6ad2ce0f3c0b2325"}, + {file = "lxml-5.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4025bf2884ac4370a3243c5aa8d66d3cb9e15d3ddd0af2d796eccc5f0244390e"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9459e6892f59ecea2e2584ee1058f5d8f629446eab52ba2305ae13a32a059530"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47fb24cc0f052f0576ea382872b3fc7e1f7e3028e53299ea751839418ade92a6"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50441c9de951a153c698b9b99992e806b71c1f36d14b154592580ff4a9d0d877"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ab339536aa798b1e17750733663d272038bf28069761d5be57cb4a9b0137b4f8"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9776af1aad5a4b4a1317242ee2bea51da54b2a7b7b48674be736d463c999f37d"}, + {file = "lxml-5.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:63e7968ff83da2eb6fdda967483a7a023aa497d85ad8f05c3ad9b1f2e8c84987"}, + {file = "lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd"}, ] [package.extras] @@ -2089,13 +2062,13 @@ source = ["Cython (>=3.0.11,<3.1.0)"] [[package]] name = "mako" -version = "1.3.9" +version = "1.3.10" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." optional = true python-versions = ">=3.8" files = [ - {file = "Mako-1.3.9-py3-none-any.whl", hash = "sha256:95920acccb578427a9aa38e37a186b1e43156c87260d7ba18ca63aa4c7cbd3a1"}, - {file = "mako-1.3.9.tar.gz", hash = "sha256:b5d65ff3462870feec922dbccf38f6efb44e5714d7b593a656be86663d8600ac"}, + {file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"}, + {file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"}, ] [package.dependencies] @@ -2192,13 +2165,13 @@ traitlets = "*" [[package]] name = "mavehgvs" -version = "0.6.2" +version = "0.7.0" description = "Regular expression-based validation of HGVS-style variant strings for Multiplexed Assays of Variant Effect." optional = false python-versions = ">=3.6" files = [ - {file = "mavehgvs-0.6.2-py3-none-any.whl", hash = "sha256:f2c330372feb5f6b9ebae3b133842f7b7d6b436cac2e8996d6618cca7f576dac"}, - {file = "mavehgvs-0.6.2.tar.gz", hash = "sha256:876c6313f986eb64e4c49ae1a94f059ffc43c2d6724f4bec5fbbe1aad97e3f70"}, + {file = "mavehgvs-0.7.0-py3-none-any.whl", hash = "sha256:a89d2ee16cf18a6a6ecfc2b6f5e280c3c699ddfe106b4389540fb0423f98e922"}, + {file = "mavehgvs-0.7.0.tar.gz", hash = "sha256:09cc3311b6ccf53a3ce3e474611af9e28b87fa02b8e690343f99a85534f25eae"}, ] [package.dependencies] @@ -2270,13 +2243,13 @@ reports = ["lxml"] [[package]] name = "mypy-extensions" -version = "1.0.0" +version = "1.1.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] [[package]] @@ -2354,13 +2327,13 @@ simplejson = "*" [[package]] name = "packaging" -version = "24.2" +version = "25.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, - {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] [[package]] @@ -2534,13 +2507,13 @@ virtualenv = ">=20.10.0" [[package]] name = "prompt-toolkit" -version = "3.0.50" +version = "3.0.51" description = "Library for building powerful interactive command lines in Python" optional = true -python-versions = ">=3.8.0" +python-versions = ">=3.8" files = [ - {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, - {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, + {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"}, + {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"}, ] [package.dependencies] @@ -2571,13 +2544,13 @@ test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "psycopg" -version = "3.2.6" +version = "3.2.7" description = "PostgreSQL database adapter for Python" optional = false python-versions = ">=3.8" files = [ - {file = "psycopg-3.2.6-py3-none-any.whl", hash = "sha256:f3ff5488525890abb0566c429146add66b329e20d6d4835662b920cbbf90ac58"}, - {file = "psycopg-3.2.6.tar.gz", hash = "sha256:16fa094efa2698f260f2af74f3710f781e4a6f226efe9d1fd0c37f384639ed8a"}, + {file = "psycopg-3.2.7-py3-none-any.whl", hash = "sha256:d39747d2d5b9658b69fa462ad21d31f1ba4a5722ad1d0cb952552bc0b4125451"}, + {file = "psycopg-3.2.7.tar.gz", hash = "sha256:9afa609c7ebf139827a38c0bf61be9c024a3ed743f56443de9d38e1efc260bf3"}, ] [package.dependencies] @@ -2585,9 +2558,9 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] -binary = ["psycopg-binary (==3.2.6)"] -c = ["psycopg-c (==3.2.6)"] -dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.14)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "wheel (>=0.37)"] +binary = ["psycopg-binary (==3.2.7)"] +c = ["psycopg-c (==3.2.7)"] +dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.14)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] pool = ["psycopg-pool"] test = ["anyio (>=4.0)", "mypy (>=1.14)", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] @@ -2605,6 +2578,7 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, + {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -2659,61 +2633,61 @@ files = [ [[package]] name = "pydantic" -version = "1.10.21" +version = "1.10.22" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.21-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:245e486e0fec53ec2366df9cf1cba36e0bbf066af7cd9c974bbbd9ba10e1e586"}, - {file = "pydantic-1.10.21-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c54f8d4c151c1de784c5b93dfbb872067e3414619e10e21e695f7bb84d1d1fd"}, - {file = "pydantic-1.10.21-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b64708009cfabd9c2211295144ff455ec7ceb4c4fb45a07a804309598f36187"}, - {file = "pydantic-1.10.21-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a148410fa0e971ba333358d11a6dea7b48e063de127c2b09ece9d1c1137dde4"}, - {file = "pydantic-1.10.21-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:36ceadef055af06e7756eb4b871cdc9e5a27bdc06a45c820cd94b443de019bbf"}, - {file = "pydantic-1.10.21-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0501e1d12df6ab1211b8cad52d2f7b2cd81f8e8e776d39aa5e71e2998d0379f"}, - {file = "pydantic-1.10.21-cp310-cp310-win_amd64.whl", hash = "sha256:c261127c275d7bce50b26b26c7d8427dcb5c4803e840e913f8d9df3f99dca55f"}, - {file = "pydantic-1.10.21-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8b6350b68566bb6b164fb06a3772e878887f3c857c46c0c534788081cb48adf4"}, - {file = "pydantic-1.10.21-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:935b19fdcde236f4fbf691959fa5c3e2b6951fff132964e869e57c70f2ad1ba3"}, - {file = "pydantic-1.10.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b6a04efdcd25486b27f24c1648d5adc1633ad8b4506d0e96e5367f075ed2e0b"}, - {file = "pydantic-1.10.21-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1ba253eb5af8d89864073e6ce8e6c8dec5f49920cff61f38f5c3383e38b1c9f"}, - {file = "pydantic-1.10.21-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:57f0101e6c97b411f287a0b7cf5ebc4e5d3b18254bf926f45a11615d29475793"}, - {file = "pydantic-1.10.21-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e85834f0370d737c77a386ce505c21b06bfe7086c1c568b70e15a568d9670d"}, - {file = "pydantic-1.10.21-cp311-cp311-win_amd64.whl", hash = "sha256:6a497bc66b3374b7d105763d1d3de76d949287bf28969bff4656206ab8a53aa9"}, - {file = "pydantic-1.10.21-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2ed4a5f13cf160d64aa331ab9017af81f3481cd9fd0e49f1d707b57fe1b9f3ae"}, - {file = "pydantic-1.10.21-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3b7693bb6ed3fbe250e222f9415abb73111bb09b73ab90d2d4d53f6390e0ccc1"}, - {file = "pydantic-1.10.21-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185d5f1dff1fead51766da9b2de4f3dc3b8fca39e59383c273f34a6ae254e3e2"}, - {file = "pydantic-1.10.21-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38e6d35cf7cd1727822c79e324fa0677e1a08c88a34f56695101f5ad4d5e20e5"}, - {file = "pydantic-1.10.21-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1d7c332685eafacb64a1a7645b409a166eb7537f23142d26895746f628a3149b"}, - {file = "pydantic-1.10.21-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c9b782db6f993a36092480eeaab8ba0609f786041b01f39c7c52252bda6d85f"}, - {file = "pydantic-1.10.21-cp312-cp312-win_amd64.whl", hash = "sha256:7ce64d23d4e71d9698492479505674c5c5b92cda02b07c91dfc13633b2eef805"}, - {file = "pydantic-1.10.21-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0067935d35044950be781933ab91b9a708eaff124bf860fa2f70aeb1c4be7212"}, - {file = "pydantic-1.10.21-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e8148c2ce4894ce7e5a4925d9d3fdce429fb0e821b5a8783573f3611933a251"}, - {file = "pydantic-1.10.21-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4973232c98b9b44c78b1233693e5e1938add5af18042f031737e1214455f9b8"}, - {file = "pydantic-1.10.21-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:662bf5ce3c9b1cef32a32a2f4debe00d2f4839fefbebe1d6956e681122a9c839"}, - {file = "pydantic-1.10.21-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:98737c3ab5a2f8a85f2326eebcd214510f898881a290a7939a45ec294743c875"}, - {file = "pydantic-1.10.21-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0bb58bbe65a43483d49f66b6c8474424d551a3fbe8a7796c42da314bac712738"}, - {file = "pydantic-1.10.21-cp313-cp313-win_amd64.whl", hash = "sha256:e622314542fb48542c09c7bd1ac51d71c5632dd3c92dc82ede6da233f55f4848"}, - {file = "pydantic-1.10.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d356aa5b18ef5a24d8081f5c5beb67c0a2a6ff2a953ee38d65a2aa96526b274f"}, - {file = "pydantic-1.10.21-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08caa8c0468172d27c669abfe9e7d96a8b1655ec0833753e117061febaaadef5"}, - {file = "pydantic-1.10.21-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c677aa39ec737fec932feb68e4a2abe142682f2885558402602cd9746a1c92e8"}, - {file = "pydantic-1.10.21-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:79577cc045d3442c4e845df53df9f9202546e2ba54954c057d253fc17cd16cb1"}, - {file = "pydantic-1.10.21-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:b6b73ab347284719f818acb14f7cd80696c6fdf1bd34feee1955d7a72d2e64ce"}, - {file = "pydantic-1.10.21-cp37-cp37m-win_amd64.whl", hash = "sha256:46cffa24891b06269e12f7e1ec50b73f0c9ab4ce71c2caa4ccf1fb36845e1ff7"}, - {file = "pydantic-1.10.21-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:298d6f765e3c9825dfa78f24c1efd29af91c3ab1b763e1fd26ae4d9e1749e5c8"}, - {file = "pydantic-1.10.21-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f2f4a2305f15eff68f874766d982114ac89468f1c2c0b97640e719cf1a078374"}, - {file = "pydantic-1.10.21-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35b263b60c519354afb3a60107d20470dd5250b3ce54c08753f6975c406d949b"}, - {file = "pydantic-1.10.21-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e23a97a6c2f2db88995496db9387cd1727acdacc85835ba8619dce826c0b11a6"}, - {file = "pydantic-1.10.21-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:3c96fed246ccc1acb2df032ff642459e4ae18b315ecbab4d95c95cfa292e8517"}, - {file = "pydantic-1.10.21-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b92893ebefc0151474f682e7debb6ab38552ce56a90e39a8834734c81f37c8a9"}, - {file = "pydantic-1.10.21-cp38-cp38-win_amd64.whl", hash = "sha256:b8460bc256bf0de821839aea6794bb38a4c0fbd48f949ea51093f6edce0be459"}, - {file = "pydantic-1.10.21-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5d387940f0f1a0adb3c44481aa379122d06df8486cc8f652a7b3b0caf08435f7"}, - {file = "pydantic-1.10.21-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:266ecfc384861d7b0b9c214788ddff75a2ea123aa756bcca6b2a1175edeca0fe"}, - {file = "pydantic-1.10.21-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61da798c05a06a362a2f8c5e3ff0341743e2818d0f530eaac0d6898f1b187f1f"}, - {file = "pydantic-1.10.21-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a621742da75ce272d64ea57bd7651ee2a115fa67c0f11d66d9dcfc18c2f1b106"}, - {file = "pydantic-1.10.21-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:9e3e4000cd54ef455694b8be9111ea20f66a686fc155feda1ecacf2322b115da"}, - {file = "pydantic-1.10.21-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f198c8206640f4c0ef5a76b779241efb1380a300d88b1bce9bfe95a6362e674d"}, - {file = "pydantic-1.10.21-cp39-cp39-win_amd64.whl", hash = "sha256:e7f0cda108b36a30c8fc882e4fc5b7eec8ef584aa43aa43694c6a7b274fb2b56"}, - {file = "pydantic-1.10.21-py3-none-any.whl", hash = "sha256:db70c920cba9d05c69ad4a9e7f8e9e83011abb2c6490e561de9ae24aee44925c"}, - {file = "pydantic-1.10.21.tar.gz", hash = "sha256:64b48e2b609a6c22178a56c408ee1215a7206077ecb8a193e2fda31858b2362a"}, + {file = "pydantic-1.10.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:57889565ccc1e5b7b73343329bbe6198ebc472e3ee874af2fa1865cfe7048228"}, + {file = "pydantic-1.10.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90729e22426de79bc6a3526b4c45ec4400caf0d4f10d7181ba7f12c01bb3897d"}, + {file = "pydantic-1.10.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8684d347f351554ec94fdcb507983d3116dc4577fb8799fed63c65869a2d10"}, + {file = "pydantic-1.10.22-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8dad498ceff2d9ef1d2e2bc6608f5b59b8e1ba2031759b22dfb8c16608e1802"}, + {file = "pydantic-1.10.22-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fac529cc654d4575cf8de191cce354b12ba705f528a0a5c654de6d01f76cd818"}, + {file = "pydantic-1.10.22-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4148232aded8dd1dd13cf910a01b32a763c34bd79a0ab4d1ee66164fcb0b7b9d"}, + {file = "pydantic-1.10.22-cp310-cp310-win_amd64.whl", hash = "sha256:ece68105d9e436db45d8650dc375c760cc85a6793ae019c08769052902dca7db"}, + {file = "pydantic-1.10.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8e530a8da353f791ad89e701c35787418605d35085f4bdda51b416946070e938"}, + {file = "pydantic-1.10.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:654322b85642e9439d7de4c83cb4084ddd513df7ff8706005dada43b34544946"}, + {file = "pydantic-1.10.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8bece75bd1b9fc1c32b57a32831517943b1159ba18b4ba32c0d431d76a120ae"}, + {file = "pydantic-1.10.22-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eccb58767f13c6963dcf96d02cb8723ebb98b16692030803ac075d2439c07b0f"}, + {file = "pydantic-1.10.22-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7778e6200ff8ed5f7052c1516617423d22517ad36cc7a3aedd51428168e3e5e8"}, + {file = "pydantic-1.10.22-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffe02767d27c39af9ca7dc7cd479c00dda6346bb62ffc89e306f665108317a2"}, + {file = "pydantic-1.10.22-cp311-cp311-win_amd64.whl", hash = "sha256:23bc19c55427091b8e589bc08f635ab90005f2dc99518f1233386f46462c550a"}, + {file = "pydantic-1.10.22-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:92d0f97828a075a71d9efc65cf75db5f149b4d79a38c89648a63d2932894d8c9"}, + {file = "pydantic-1.10.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af5a2811b6b95b58b829aeac5996d465a5f0c7ed84bd871d603cf8646edf6ff"}, + {file = "pydantic-1.10.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cf06d8d40993e79af0ab2102ef5da77b9ddba51248e4cb27f9f3f591fbb096e"}, + {file = "pydantic-1.10.22-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:184b7865b171a6057ad97f4a17fbac81cec29bd103e996e7add3d16b0d95f609"}, + {file = "pydantic-1.10.22-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:923ad861677ab09d89be35d36111156063a7ebb44322cdb7b49266e1adaba4bb"}, + {file = "pydantic-1.10.22-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:82d9a3da1686443fb854c8d2ab9a473251f8f4cdd11b125522efb4d7c646e7bc"}, + {file = "pydantic-1.10.22-cp312-cp312-win_amd64.whl", hash = "sha256:1612604929af4c602694a7f3338b18039d402eb5ddfbf0db44f1ebfaf07f93e7"}, + {file = "pydantic-1.10.22-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b259dc89c9abcd24bf42f31951fb46c62e904ccf4316393f317abeeecda39978"}, + {file = "pydantic-1.10.22-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9238aa0964d80c0908d2f385e981add58faead4412ca80ef0fa352094c24e46d"}, + {file = "pydantic-1.10.22-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f8029f05b04080e3f1a550575a1bca747c0ea4be48e2d551473d47fd768fc1b"}, + {file = "pydantic-1.10.22-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5c06918894f119e0431a36c9393bc7cceeb34d1feeb66670ef9b9ca48c073937"}, + {file = "pydantic-1.10.22-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e205311649622ee8fc1ec9089bd2076823797f5cd2c1e3182dc0e12aab835b35"}, + {file = "pydantic-1.10.22-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:815f0a73d5688d6dd0796a7edb9eca7071bfef961a7b33f91e618822ae7345b7"}, + {file = "pydantic-1.10.22-cp313-cp313-win_amd64.whl", hash = "sha256:9dfce71d42a5cde10e78a469e3d986f656afc245ab1b97c7106036f088dd91f8"}, + {file = "pydantic-1.10.22-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3ecaf8177b06aac5d1f442db1288e3b46d9f05f34fd17fdca3ad34105328b61a"}, + {file = "pydantic-1.10.22-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb36c2de9ea74bd7f66b5481dea8032d399affd1cbfbb9bb7ce539437f1fce62"}, + {file = "pydantic-1.10.22-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6b8d14a256be3b8fff9286d76c532f1a7573fbba5f189305b22471c6679854d"}, + {file = "pydantic-1.10.22-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:1c33269e815db4324e71577174c29c7aa30d1bba51340ce6be976f6f3053a4c6"}, + {file = "pydantic-1.10.22-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:8661b3ab2735b2a9ccca2634738534a795f4a10bae3ab28ec0a10c96baa20182"}, + {file = "pydantic-1.10.22-cp37-cp37m-win_amd64.whl", hash = "sha256:22bdd5fe70d4549995981c55b970f59de5c502d5656b2abdfcd0a25be6f3763e"}, + {file = "pydantic-1.10.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e3f33d1358aa4bc2795208cc29ff3118aeaad0ea36f0946788cf7cadeccc166b"}, + {file = "pydantic-1.10.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:813f079f9cd136cac621f3f9128a4406eb8abd2ad9fdf916a0731d91c6590017"}, + {file = "pydantic-1.10.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab618ab8dca6eac7f0755db25f6aba3c22c40e3463f85a1c08dc93092d917704"}, + {file = "pydantic-1.10.22-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d128e1aaa38db88caca920d5822c98fc06516a09a58b6d3d60fa5ea9099b32cc"}, + {file = "pydantic-1.10.22-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:cc97bbc25def7025e55fc9016080773167cda2aad7294e06a37dda04c7d69ece"}, + {file = "pydantic-1.10.22-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dda5d7157d543b1fa565038cae6e952549d0f90071c839b3740fb77c820fab8"}, + {file = "pydantic-1.10.22-cp38-cp38-win_amd64.whl", hash = "sha256:a093fe44fe518cb445d23119511a71f756f8503139d02fcdd1173f7b76c95ffe"}, + {file = "pydantic-1.10.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec54c89b2568b258bb30d7348ac4d82bec1b58b377fb56a00441e2ac66b24587"}, + {file = "pydantic-1.10.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8f1d1a1532e4f3bcab4e34e8d2197a7def4b67072acd26cfa60e92d75803a48"}, + {file = "pydantic-1.10.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad83ca35508c27eae1005b6b61f369f78aae6d27ead2135ec156a2599910121"}, + {file = "pydantic-1.10.22-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53cdb44b78c420f570ff16b071ea8cd5a477635c6b0efc343c8a91e3029bbf1a"}, + {file = "pydantic-1.10.22-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:16d0a5ae9d98264186ce31acdd7686ec05fd331fab9d68ed777d5cb2d1514e5e"}, + {file = "pydantic-1.10.22-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8aee040e25843f036192b1a1af62117504a209a043aa8db12e190bb86ad7e611"}, + {file = "pydantic-1.10.22-cp39-cp39-win_amd64.whl", hash = "sha256:7f691eec68dbbfca497d3c11b92a3e5987393174cbedf03ec7a4184c35c2def6"}, + {file = "pydantic-1.10.22-py3-none-any.whl", hash = "sha256:343037d608bcbd34df937ac259708bfc83664dadf88afe8516c4f282d7d471a9"}, + {file = "pydantic-1.10.22.tar.gz", hash = "sha256:ee1006cebd43a8e7158fb7190bb8f4e2da9649719bff65d0c287282ec38dec6d"}, ] [package.dependencies] @@ -2968,13 +2942,13 @@ files = [ [[package]] name = "pytz" -version = "2025.1" +version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, - {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] [[package]] @@ -3116,125 +3090,136 @@ test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "tes [[package]] name = "rpds-py" -version = "0.23.1" +version = "0.24.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" files = [ - {file = "rpds_py-0.23.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed"}, - {file = "rpds_py-0.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d"}, - {file = "rpds_py-0.23.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f"}, - {file = "rpds_py-0.23.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a"}, - {file = "rpds_py-0.23.1-cp310-cp310-win32.whl", hash = "sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12"}, - {file = "rpds_py-0.23.1-cp310-cp310-win_amd64.whl", hash = "sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda"}, - {file = "rpds_py-0.23.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590"}, - {file = "rpds_py-0.23.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580"}, - {file = "rpds_py-0.23.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35"}, - {file = "rpds_py-0.23.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522"}, - {file = "rpds_py-0.23.1-cp311-cp311-win32.whl", hash = "sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6"}, - {file = "rpds_py-0.23.1-cp311-cp311-win_amd64.whl", hash = "sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf"}, - {file = "rpds_py-0.23.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c"}, - {file = "rpds_py-0.23.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc"}, - {file = "rpds_py-0.23.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef"}, - {file = "rpds_py-0.23.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad"}, - {file = "rpds_py-0.23.1-cp312-cp312-win32.whl", hash = "sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057"}, - {file = "rpds_py-0.23.1-cp312-cp312-win_amd64.whl", hash = "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165"}, - {file = "rpds_py-0.23.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935"}, - {file = "rpds_py-0.23.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013"}, - {file = "rpds_py-0.23.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957"}, - {file = "rpds_py-0.23.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93"}, - {file = "rpds_py-0.23.1-cp313-cp313-win32.whl", hash = "sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd"}, - {file = "rpds_py-0.23.1-cp313-cp313-win_amd64.whl", hash = "sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70"}, - {file = "rpds_py-0.23.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731"}, - {file = "rpds_py-0.23.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722"}, - {file = "rpds_py-0.23.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b"}, - {file = "rpds_py-0.23.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5"}, - {file = "rpds_py-0.23.1-cp313-cp313t-win32.whl", hash = "sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7"}, - {file = "rpds_py-0.23.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d"}, - {file = "rpds_py-0.23.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19"}, - {file = "rpds_py-0.23.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a"}, - {file = "rpds_py-0.23.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4"}, - {file = "rpds_py-0.23.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f"}, - {file = "rpds_py-0.23.1-cp39-cp39-win32.whl", hash = "sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495"}, - {file = "rpds_py-0.23.1-cp39-cp39-win_amd64.whl", hash = "sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4"}, - {file = "rpds_py-0.23.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3"}, - {file = "rpds_py-0.23.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00"}, - {file = "rpds_py-0.23.1.tar.gz", hash = "sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707"}, + {file = "rpds_py-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:006f4342fe729a368c6df36578d7a348c7c716be1da0a1a0f86e3021f8e98724"}, + {file = "rpds_py-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2d53747da70a4e4b17f559569d5f9506420966083a31c5fbd84e764461c4444b"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8acd55bd5b071156bae57b555f5d33697998752673b9de554dd82f5b5352727"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7e80d375134ddb04231a53800503752093dbb65dad8dabacce2c84cccc78e964"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60748789e028d2a46fc1c70750454f83c6bdd0d05db50f5ae83e2db500b34da5"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e1daf5bf6c2be39654beae83ee6b9a12347cb5aced9a29eecf12a2d25fff664"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b221c2457d92a1fb3c97bee9095c874144d196f47c038462ae6e4a14436f7bc"}, + {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:66420986c9afff67ef0c5d1e4cdc2d0e5262f53ad11e4f90e5e22448df485bf0"}, + {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:43dba99f00f1d37b2a0265a259592d05fcc8e7c19d140fe51c6e6f16faabeb1f"}, + {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a88c0d17d039333a41d9bf4616bd062f0bd7aa0edeb6cafe00a2fc2a804e944f"}, + {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc31e13ce212e14a539d430428cd365e74f8b2d534f8bc22dd4c9c55b277b875"}, + {file = "rpds_py-0.24.0-cp310-cp310-win32.whl", hash = "sha256:fc2c1e1b00f88317d9de6b2c2b39b012ebbfe35fe5e7bef980fd2a91f6100a07"}, + {file = "rpds_py-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:c0145295ca415668420ad142ee42189f78d27af806fcf1f32a18e51d47dd2052"}, + {file = "rpds_py-0.24.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2d3ee4615df36ab8eb16c2507b11e764dcc11fd350bbf4da16d09cda11fcedef"}, + {file = "rpds_py-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e13ae74a8a3a0c2f22f450f773e35f893484fcfacb00bb4344a7e0f4f48e1f97"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf86f72d705fc2ef776bb7dd9e5fbba79d7e1f3e258bf9377f8204ad0fc1c51e"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c43583ea8517ed2e780a345dd9960896afc1327e8cf3ac8239c167530397440d"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4cd031e63bc5f05bdcda120646a0d32f6d729486d0067f09d79c8db5368f4586"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34d90ad8c045df9a4259c47d2e16a3f21fdb396665c94520dbfe8766e62187a4"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e838bf2bb0b91ee67bf2b889a1a841e5ecac06dd7a2b1ef4e6151e2ce155c7ae"}, + {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04ecf5c1ff4d589987b4d9882872f80ba13da7d42427234fce8f22efb43133bc"}, + {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:630d3d8ea77eabd6cbcd2ea712e1c5cecb5b558d39547ac988351195db433f6c"}, + {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ebcb786b9ff30b994d5969213a8430cbb984cdd7ea9fd6df06663194bd3c450c"}, + {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:174e46569968ddbbeb8a806d9922f17cd2b524aa753b468f35b97ff9c19cb718"}, + {file = "rpds_py-0.24.0-cp311-cp311-win32.whl", hash = "sha256:5ef877fa3bbfb40b388a5ae1cb00636a624690dcb9a29a65267054c9ea86d88a"}, + {file = "rpds_py-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:e274f62cbd274359eff63e5c7e7274c913e8e09620f6a57aae66744b3df046d6"}, + {file = "rpds_py-0.24.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d8551e733626afec514b5d15befabea0dd70a343a9f23322860c4f16a9430205"}, + {file = "rpds_py-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e374c0ce0ca82e5b67cd61fb964077d40ec177dd2c4eda67dba130de09085c7"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69d003296df4840bd445a5d15fa5b6ff6ac40496f956a221c4d1f6f7b4bc4d9"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8212ff58ac6dfde49946bea57474a386cca3f7706fc72c25b772b9ca4af6b79e"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:528927e63a70b4d5f3f5ccc1fa988a35456eb5d15f804d276709c33fc2f19bda"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a824d2c7a703ba6daaca848f9c3d5cb93af0505be505de70e7e66829affd676e"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d51febb7a114293ffd56c6cf4736cb31cd68c0fddd6aa303ed09ea5a48e029"}, + {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3fab5f4a2c64a8fb64fc13b3d139848817a64d467dd6ed60dcdd6b479e7febc9"}, + {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9be4f99bee42ac107870c61dfdb294d912bf81c3c6d45538aad7aecab468b6b7"}, + {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:564c96b6076a98215af52f55efa90d8419cc2ef45d99e314fddefe816bc24f91"}, + {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:75a810b7664c17f24bf2ffd7f92416c00ec84b49bb68e6a0d93e542406336b56"}, + {file = "rpds_py-0.24.0-cp312-cp312-win32.whl", hash = "sha256:f6016bd950be4dcd047b7475fdf55fb1e1f59fc7403f387be0e8123e4a576d30"}, + {file = "rpds_py-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:998c01b8e71cf051c28f5d6f1187abbdf5cf45fc0efce5da6c06447cba997034"}, + {file = "rpds_py-0.24.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:3d2d8e4508e15fc05b31285c4b00ddf2e0eb94259c2dc896771966a163122a0c"}, + {file = "rpds_py-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0f00c16e089282ad68a3820fd0c831c35d3194b7cdc31d6e469511d9bffc535c"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951cc481c0c395c4a08639a469d53b7d4afa252529a085418b82a6b43c45c240"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9ca89938dff18828a328af41ffdf3902405a19f4131c88e22e776a8e228c5a8"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed0ef550042a8dbcd657dfb284a8ee00f0ba269d3f2286b0493b15a5694f9fe8"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b2356688e5d958c4d5cb964af865bea84db29971d3e563fb78e46e20fe1848b"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78884d155fd15d9f64f5d6124b486f3d3f7fd7cd71a78e9670a0f6f6ca06fb2d"}, + {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6a4a535013aeeef13c5532f802708cecae8d66c282babb5cd916379b72110cf7"}, + {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:84e0566f15cf4d769dade9b366b7b87c959be472c92dffb70462dd0844d7cbad"}, + {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:823e74ab6fbaa028ec89615ff6acb409e90ff45580c45920d4dfdddb069f2120"}, + {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c61a2cb0085c8783906b2f8b1f16a7e65777823c7f4d0a6aaffe26dc0d358dd9"}, + {file = "rpds_py-0.24.0-cp313-cp313-win32.whl", hash = "sha256:60d9b630c8025b9458a9d114e3af579a2c54bd32df601c4581bd054e85258143"}, + {file = "rpds_py-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:6eea559077d29486c68218178ea946263b87f1c41ae7f996b1f30a983c476a5a"}, + {file = "rpds_py-0.24.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:d09dc82af2d3c17e7dd17120b202a79b578d79f2b5424bda209d9966efeed114"}, + {file = "rpds_py-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5fc13b44de6419d1e7a7e592a4885b323fbc2f46e1f22151e3a8ed3b8b920405"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c347a20d79cedc0a7bd51c4d4b7dbc613ca4e65a756b5c3e57ec84bd43505b47"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20f2712bd1cc26a3cc16c5a1bfee9ed1abc33d4cdf1aabd297fe0eb724df4272"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aad911555286884be1e427ef0dc0ba3929e6821cbeca2194b13dc415a462c7fd"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0aeb3329c1721c43c58cae274d7d2ca85c1690d89485d9c63a006cb79a85771a"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a0f156e9509cee987283abd2296ec816225145a13ed0391df8f71bf1d789e2d"}, + {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa6800adc8204ce898c8a424303969b7aa6a5e4ad2789c13f8648739830323b7"}, + {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a18fc371e900a21d7392517c6f60fe859e802547309e94313cd8181ad9db004d"}, + {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9168764133fd919f8dcca2ead66de0105f4ef5659cbb4fa044f7014bed9a1797"}, + {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f6e3cec44ba05ee5cbdebe92d052f69b63ae792e7d05f1020ac5e964394080c"}, + {file = "rpds_py-0.24.0-cp313-cp313t-win32.whl", hash = "sha256:8ebc7e65ca4b111d928b669713865f021b7773350eeac4a31d3e70144297baba"}, + {file = "rpds_py-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:675269d407a257b8c00a6b58205b72eec8231656506c56fd429d924ca00bb350"}, + {file = "rpds_py-0.24.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a36b452abbf29f68527cf52e181fced56685731c86b52e852053e38d8b60bc8d"}, + {file = "rpds_py-0.24.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b3b397eefecec8e8e39fa65c630ef70a24b09141a6f9fc17b3c3a50bed6b50e"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdabcd3beb2a6dca7027007473d8ef1c3b053347c76f685f5f060a00327b8b65"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5db385bacd0c43f24be92b60c857cf760b7f10d8234f4bd4be67b5b20a7c0b6b"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8097b3422d020ff1c44effc40ae58e67d93e60d540a65649d2cdaf9466030791"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493fe54318bed7d124ce272fc36adbf59d46729659b2c792e87c3b95649cdee9"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8aa362811ccdc1f8dadcc916c6d47e554169ab79559319ae9fae7d7752d0d60c"}, + {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8f9a6e7fd5434817526815f09ea27f2746c4a51ee11bb3439065f5fc754db58"}, + {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8205ee14463248d3349131bb8099efe15cd3ce83b8ef3ace63c7e976998e7124"}, + {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:921ae54f9ecba3b6325df425cf72c074cd469dea843fb5743a26ca7fb2ccb149"}, + {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:32bab0a56eac685828e00cc2f5d1200c548f8bc11f2e44abf311d6b548ce2e45"}, + {file = "rpds_py-0.24.0-cp39-cp39-win32.whl", hash = "sha256:f5c0ed12926dec1dfe7d645333ea59cf93f4d07750986a586f511c0bc61fe103"}, + {file = "rpds_py-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:afc6e35f344490faa8276b5f2f7cbf71f88bc2cda4328e00553bd451728c571f"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:619ca56a5468f933d940e1bf431c6f4e13bef8e688698b067ae68eb4f9b30e3a"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b28e5122829181de1898c2c97f81c0b3246d49f585f22743a1246420bb8d399"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e5ab32cf9eb3647450bc74eb201b27c185d3857276162c101c0f8c6374e098"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:208b3a70a98cf3710e97cabdc308a51cd4f28aa6e7bb11de3d56cd8b74bab98d"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbc4362e06f950c62cad3d4abf1191021b2ffaf0b31ac230fbf0526453eee75e"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebea2821cdb5f9fef44933617be76185b80150632736f3d76e54829ab4a3b4d1"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9a4df06c35465ef4d81799999bba810c68d29972bf1c31db61bfdb81dd9d5bb"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d3aa13bdf38630da298f2e0d77aca967b200b8cc1473ea05248f6c5e9c9bdb44"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:041f00419e1da7a03c46042453598479f45be3d787eb837af382bfc169c0db33"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:d8754d872a5dfc3c5bf9c0e059e8107451364a30d9fd50f1f1a85c4fb9481164"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:896c41007931217a343eff197c34513c154267636c8056fb409eafd494c3dcdc"}, + {file = "rpds_py-0.24.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:92558d37d872e808944c3c96d0423b8604879a3d1c86fdad508d7ed91ea547d5"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f9e0057a509e096e47c87f753136c9b10d7a91842d8042c2ee6866899a717c0d"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d6e109a454412ab82979c5b1b3aee0604eca4bbf9a02693bb9df027af2bfa91a"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc1c892b1ec1f8cbd5da8de287577b455e388d9c328ad592eabbdcb6fc93bee5"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c39438c55983d48f4bb3487734d040e22dad200dab22c41e331cee145e7a50d"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d7e8ce990ae17dda686f7e82fd41a055c668e13ddcf058e7fb5e9da20b57793"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ea7f4174d2e4194289cb0c4e172d83e79a6404297ff95f2875cf9ac9bced8ba"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb2954155bb8f63bb19d56d80e5e5320b61d71084617ed89efedb861a684baea"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04f2b712a2206e13800a8136b07aaedc23af3facab84918e7aa89e4be0260032"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:eda5c1e2a715a4cbbca2d6d304988460942551e4e5e3b7457b50943cd741626d"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:9abc80fe8c1f87218db116016de575a7998ab1629078c90840e8d11ab423ee25"}, + {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6a727fd083009bc83eb83d6950f0c32b3c94c8b80a9b667c87f4bd1274ca30ba"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e0f3ef95795efcd3b2ec3fe0a5bcfb5dadf5e3996ea2117427e524d4fbf309c6"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2c13777ecdbbba2077670285dd1fe50828c8742f6a4119dbef6f83ea13ad10fb"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79e8d804c2ccd618417e96720ad5cd076a86fa3f8cb310ea386a3e6229bae7d1"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd822f019ccccd75c832deb7aa040bb02d70a92eb15a2f16c7987b7ad4ee8d83"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0047638c3aa0dbcd0ab99ed1e549bbf0e142c9ecc173b6492868432d8989a046"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5b66d1b201cc71bc3081bc2f1fc36b0c1f268b773e03bbc39066651b9e18391"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbcbb6db5582ea33ce46a5d20a5793134b5365110d84df4e30b9d37c6fd40ad3"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63981feca3f110ed132fd217bf7768ee8ed738a55549883628ee3da75bb9cb78"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3a55fc10fdcbf1a4bd3c018eea422c52cf08700cf99c28b5cb10fe97ab77a0d3"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:c30ff468163a48535ee7e9bf21bd14c7a81147c0e58a36c1078289a8ca7af0bd"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:369d9c6d4c714e36d4a03957b4783217a3ccd1e222cdd67d464a3a479fc17796"}, + {file = "rpds_py-0.24.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:24795c099453e3721fda5d8ddd45f5dfcc8e5a547ce7b8e9da06fecc3832e26f"}, + {file = "rpds_py-0.24.0.tar.gz", hash = "sha256:772cc1b2cd963e7e17e6cc55fe0371fb9c704d63e44cacec7b9b7f523b78919e"}, ] [[package]] name = "rsa" -version = "4.9" +version = "4.9.1" description = "Pure-Python RSA implementation" optional = true -python-versions = ">=3.6,<4" +python-versions = "<4,>=3.6" files = [ - {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, - {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, + {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, + {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, ] [package.dependencies] @@ -3286,18 +3271,18 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "77.0.1" +version = "80.3.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-77.0.1-py3-none-any.whl", hash = "sha256:81a234dff81a82bb52e522c8aef145d0dd4de1fd6de4d3b196d0f77dc2fded26"}, - {file = "setuptools-77.0.1.tar.gz", hash = "sha256:a1246a1b4178c66d7cf50c9fc6d530fac3f89bc284cf803c7fa878c41b1a03b2"}, + {file = "setuptools-80.3.1-py3-none-any.whl", hash = "sha256:ea8e00d7992054c4c592aeb892f6ad51fe1b4d90cc6947cc45c45717c40ec537"}, + {file = "setuptools-80.3.1.tar.gz", hash = "sha256:31e2c58dbb67c99c289f51c16d899afedae292b978f8051efaf6262d8212f927"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.8.0)"] -core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib_metadata (>=6)", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] @@ -3473,92 +3458,92 @@ files = [ [[package]] name = "soupsieve" -version = "2.6" +version = "2.7" description = "A modern CSS selector implementation for Beautiful Soup." optional = true python-versions = ">=3.8" files = [ - {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, - {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, ] [[package]] name = "sqlalchemy" -version = "2.0.39" +version = "2.0.40" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.39-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:66a40003bc244e4ad86b72abb9965d304726d05a939e8c09ce844d27af9e6d37"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67de057fbcb04a066171bd9ee6bcb58738d89378ee3cabff0bffbf343ae1c787"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:533e0f66c32093a987a30df3ad6ed21170db9d581d0b38e71396c49718fbb1ca"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7399d45b62d755e9ebba94eb89437f80512c08edde8c63716552a3aade61eb42"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:788b6ff6728072b313802be13e88113c33696a9a1f2f6d634a97c20f7ef5ccce"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-win32.whl", hash = "sha256:01da15490c9df352fbc29859d3c7ba9cd1377791faeeb47c100832004c99472c"}, - {file = "SQLAlchemy-2.0.39-cp37-cp37m-win_amd64.whl", hash = "sha256:f2bcb085faffcacf9319b1b1445a7e1cfdc6fb46c03f2dce7bc2d9a4b3c1cdc5"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b761a6847f96fdc2d002e29e9e9ac2439c13b919adfd64e8ef49e75f6355c548"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d7e3866eb52d914aea50c9be74184a0feb86f9af8aaaa4daefe52b69378db0b"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:995c2bacdddcb640c2ca558e6760383dcdd68830160af92b5c6e6928ffd259b4"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344cd1ec2b3c6bdd5dfde7ba7e3b879e0f8dd44181f16b895940be9b842fd2b6"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5dfbc543578058c340360f851ddcecd7a1e26b0d9b5b69259b526da9edfa8875"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3395e7ed89c6d264d38bea3bfb22ffe868f906a7985d03546ec7dc30221ea980"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-win32.whl", hash = "sha256:bf555f3e25ac3a70c67807b2949bfe15f377a40df84b71ab2c58d8593a1e036e"}, - {file = "SQLAlchemy-2.0.39-cp38-cp38-win_amd64.whl", hash = "sha256:463ecfb907b256e94bfe7bcb31a6d8c7bc96eca7cbe39803e448a58bb9fcad02"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6827f8c1b2f13f1420545bd6d5b3f9e0b85fe750388425be53d23c760dcf176b"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9f119e7736967c0ea03aff91ac7d04555ee038caf89bb855d93bbd04ae85b41"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4600c7a659d381146e1160235918826c50c80994e07c5b26946a3e7ec6c99249"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a06e6c8e31c98ddc770734c63903e39f1947c9e3e5e4bef515c5491b7737dde"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4c433f78c2908ae352848f56589c02b982d0e741b7905228fad628999799de4"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7bd5c5ee1448b6408734eaa29c0d820d061ae18cb17232ce37848376dcfa3e92"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-win32.whl", hash = "sha256:87a1ce1f5e5dc4b6f4e0aac34e7bb535cb23bd4f5d9c799ed1633b65c2bcad8c"}, - {file = "sqlalchemy-2.0.39-cp310-cp310-win_amd64.whl", hash = "sha256:871f55e478b5a648c08dd24af44345406d0e636ffe021d64c9b57a4a11518304"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a28f9c238f1e143ff42ab3ba27990dfb964e5d413c0eb001b88794c5c4a528a9"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:08cf721bbd4391a0e765fe0fe8816e81d9f43cece54fdb5ac465c56efafecb3d"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a8517b6d4005facdbd7eb4e8cf54797dbca100a7df459fdaff4c5123265c1cd"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b2de1523d46e7016afc7e42db239bd41f2163316935de7c84d0e19af7e69538"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:412c6c126369ddae171c13987b38df5122cb92015cba6f9ee1193b867f3f1530"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b35e07f1d57b79b86a7de8ecdcefb78485dab9851b9638c2c793c50203b2ae8"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-win32.whl", hash = "sha256:3eb14ba1a9d07c88669b7faf8f589be67871d6409305e73e036321d89f1d904e"}, - {file = "sqlalchemy-2.0.39-cp311-cp311-win_amd64.whl", hash = "sha256:78f1b79132a69fe8bd6b5d91ef433c8eb40688ba782b26f8c9f3d2d9ca23626f"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c457a38351fb6234781d054260c60e531047e4d07beca1889b558ff73dc2014b"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:018ee97c558b499b58935c5a152aeabf6d36b3d55d91656abeb6d93d663c0c4c"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5493a8120d6fc185f60e7254fc056a6742f1db68c0f849cfc9ab46163c21df47"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2cf5b5ddb69142511d5559c427ff00ec8c0919a1e6c09486e9c32636ea2b9dd"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9f03143f8f851dd8de6b0c10784363712058f38209e926723c80654c1b40327a"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:06205eb98cb3dd52133ca6818bf5542397f1dd1b69f7ea28aa84413897380b06"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-win32.whl", hash = "sha256:7f5243357e6da9a90c56282f64b50d29cba2ee1f745381174caacc50d501b109"}, - {file = "sqlalchemy-2.0.39-cp312-cp312-win_amd64.whl", hash = "sha256:2ed107331d188a286611cea9022de0afc437dd2d3c168e368169f27aa0f61338"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fe193d3ae297c423e0e567e240b4324d6b6c280a048e64c77a3ea6886cc2aa87"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:79f4f502125a41b1b3b34449e747a6abfd52a709d539ea7769101696bdca6716"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a10ca7f8a1ea0fd5630f02feb055b0f5cdfcd07bb3715fc1b6f8cb72bf114e4"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6b0a1c7ed54a5361aaebb910c1fa864bae34273662bb4ff788a527eafd6e14d"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52607d0ebea43cf214e2ee84a6a76bc774176f97c5a774ce33277514875a718e"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c08a972cbac2a14810463aec3a47ff218bb00c1a607e6689b531a7c589c50723"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-win32.whl", hash = "sha256:23c5aa33c01bd898f879db158537d7e7568b503b15aad60ea0c8da8109adf3e7"}, - {file = "sqlalchemy-2.0.39-cp313-cp313-win_amd64.whl", hash = "sha256:4dabd775fd66cf17f31f8625fc0e4cfc5765f7982f94dc09b9e5868182cb71c0"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2600a50d590c22d99c424c394236899ba72f849a02b10e65b4c70149606408b5"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4eff9c270afd23e2746e921e80182872058a7a592017b2713f33f96cc5f82e32"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7332868ce891eda48896131991f7f2be572d65b41a4050957242f8e935d5d7"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:125a7763b263218a80759ad9ae2f3610aaf2c2fbbd78fff088d584edf81f3782"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:04545042969833cb92e13b0a3019549d284fd2423f318b6ba10e7aa687690a3c"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:805cb481474e111ee3687c9047c5f3286e62496f09c0e82e8853338aaaa348f8"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-win32.whl", hash = "sha256:34d5c49f18778a3665d707e6286545a30339ad545950773d43977e504815fa70"}, - {file = "sqlalchemy-2.0.39-cp39-cp39-win_amd64.whl", hash = "sha256:35e72518615aa5384ef4fae828e3af1b43102458b74a8c481f69af8abf7e802a"}, - {file = "sqlalchemy-2.0.39-py3-none-any.whl", hash = "sha256:a1c6b0a5e3e326a466d809b651c63f278b1256146a377a528b6938a279da334f"}, - {file = "sqlalchemy-2.0.39.tar.gz", hash = "sha256:5d2d1fe548def3267b4c70a8568f108d1fed7cbbeccb9cc166e05af2abc25c22"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae9597cab738e7cc823f04a704fb754a9249f0b6695a6aeb63b74055cd417a96"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37a5c21ab099a83d669ebb251fddf8f5cee4d75ea40a5a1653d9c43d60e20867"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bece9527f5a98466d67fb5d34dc560c4da964240d8b09024bb21c1246545e04e"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:8bb131ffd2165fae48162c7bbd0d97c84ab961deea9b8bab16366543deeab625"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9408fd453d5f8990405cc9def9af46bfbe3183e6110401b407c2d073c3388f47"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-win32.whl", hash = "sha256:00a494ea6f42a44c326477b5bee4e0fc75f6a80c01570a32b57e89cf0fbef85a"}, + {file = "SQLAlchemy-2.0.40-cp37-cp37m-win_amd64.whl", hash = "sha256:c7b927155112ac858357ccf9d255dd8c044fd9ad2dc6ce4c4149527c901fa4c3"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f1ea21bef99c703f44444ad29c2c1b6bd55d202750b6de8e06a955380f4725d7"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:afe63b208153f3a7a2d1a5b9df452b0673082588933e54e7c8aac457cf35e758"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8aae085ea549a1eddbc9298b113cffb75e514eadbb542133dd2b99b5fb3b6af"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ea9181284754d37db15156eb7be09c86e16e50fbe77610e9e7bee09291771a1"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5434223b795be5c5ef8244e5ac98056e290d3a99bdcc539b916e282b160dda00"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15d08d5ef1b779af6a0909b97be6c1fd4298057504eb6461be88bd1696cb438e"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-win32.whl", hash = "sha256:cd2f75598ae70bcfca9117d9e51a3b06fe29edd972fdd7fd57cc97b4dbf3b08a"}, + {file = "sqlalchemy-2.0.40-cp310-cp310-win_amd64.whl", hash = "sha256:2cbafc8d39ff1abdfdda96435f38fab141892dc759a2165947d1a8fffa7ef596"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b"}, + {file = "sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1"}, + {file = "sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500"}, + {file = "sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:50f5885bbed261fc97e2e66c5156244f9704083a674b8d17f24c72217d29baf5"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf0e99cdb600eabcd1d65cdba0d3c91418fee21c4aa1d28db47d095b1064a7d8"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe147fcd85aaed53ce90645c91ed5fca0cc88a797314c70dfd9d35925bd5d106"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf7cee56bd552385c1ee39af360772fbfc2f43be005c78d1140204ad6148438"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4aeb939bcac234b88e2d25d5381655e8353fe06b4e50b1c55ecffe56951d18c2"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c268b5100cfeaa222c40f55e169d484efa1384b44bf9ca415eae6d556f02cb08"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-win32.whl", hash = "sha256:46628ebcec4f23a1584fb52f2abe12ddb00f3bb3b7b337618b80fc1b51177aff"}, + {file = "sqlalchemy-2.0.40-cp38-cp38-win_amd64.whl", hash = "sha256:7e0505719939e52a7b0c65d20e84a6044eb3712bb6f239c6b1db77ba8e173a37"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c884de19528e0fcd9dc34ee94c810581dd6e74aef75437ff17e696c2bfefae3e"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1abb387710283fc5983d8a1209d9696a4eae9db8d7ac94b402981fe2fe2e39ad"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cfa124eda500ba4b0d3afc3e91ea27ed4754e727c7f025f293a22f512bcd4c9"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6b28d303b9d57c17a5164eb1fd2d5119bb6ff4413d5894e74873280483eeb5"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b5a5bbe29c10c5bfd63893747a1bf6f8049df607638c786252cb9243b86b6706"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f0fda83e113bb0fb27dc003685f32a5dcb99c9c4f41f4fa0838ac35265c23b5c"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-win32.whl", hash = "sha256:957f8d85d5e834397ef78a6109550aeb0d27a53b5032f7a57f2451e1adc37e98"}, + {file = "sqlalchemy-2.0.40-cp39-cp39-win_amd64.whl", hash = "sha256:1ffdf9c91428e59744f8e6f98190516f8e1d05eec90e936eb08b257332c5e870"}, + {file = "sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a"}, + {file = "sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00"}, ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = ">=1", markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} mypy = {version = ">=0.910", optional = true, markers = "extra == \"mypy\""} typing-extensions = ">=4.6.0" [package.extras] -aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] -aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] -asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (>=1)"] +aioodbc = ["aioodbc", "greenlet (>=1)"] +aiosqlite = ["aiosqlite", "greenlet (>=1)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (>=1)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (>=1)"] mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] mssql = ["pyodbc"] mssql-pymssql = ["pymssql"] @@ -3569,7 +3554,7 @@ mysql-connector = ["mysql-connector-python"] oracle = ["cx_oracle (>=8)"] oracle-oracledb = ["oracledb (>=1.0.1)"] postgresql = ["psycopg2 (>=2.7)"] -postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-asyncpg = ["asyncpg", "greenlet (>=1)"] postgresql-pg8000 = ["pg8000 (>=1.29.1)"] postgresql-psycopg = ["psycopg (>=3.0.7)"] postgresql-psycopg2binary = ["psycopg2-binary"] @@ -3737,13 +3722,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-awscrt" -version = "0.24.2" +version = "0.26.1" description = "Type annotations and code completion for awscrt" optional = false python-versions = ">=3.8" files = [ - {file = "types_awscrt-0.24.2-py3-none-any.whl", hash = "sha256:345ab84a4f75b26bfb816b249657855824a4f2d1ce5b58268c549f81fce6eccc"}, - {file = "types_awscrt-0.24.2.tar.gz", hash = "sha256:5826baf69ad5d29c76be49fc7df00222281fa31b14f99e9fb4492d71ec98fea5"}, + {file = "types_awscrt-0.26.1-py3-none-any.whl", hash = "sha256:176d320a26990efc057d4bf71396e05be027c142252ac48cc0d87aaea0704280"}, + {file = "types_awscrt-0.26.1.tar.gz", hash = "sha256:aca96f889b3745c0e74f42f08f277fed3bf6e9baa2cf9b06a36f78d77720e504"}, ] [[package]] @@ -3773,24 +3758,24 @@ types-pyasn1 = "*" [[package]] name = "types-pytz" -version = "2025.1.0.20250318" +version = "2025.2.0.20250326" description = "Typing stubs for pytz" optional = false python-versions = ">=3.9" files = [ - {file = "types_pytz-2025.1.0.20250318-py3-none-any.whl", hash = "sha256:04dba4907c5415777083f9548693c6d9f80ec53adcaff55a38526a3f8ddcae04"}, - {file = "types_pytz-2025.1.0.20250318.tar.gz", hash = "sha256:97e0e35184c6fe14e3a5014512057f2c57bb0c6582d63c1cfcc4809f82180449"}, + {file = "types_pytz-2025.2.0.20250326-py3-none-any.whl", hash = "sha256:3c397fd1b845cd2b3adc9398607764ced9e578a98a5d1fbb4a9bc9253edfb162"}, + {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20241230" +version = "6.0.12.20250402" description = "Typing stubs for PyYAML" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "types_PyYAML-6.0.12.20241230-py3-none-any.whl", hash = "sha256:fa4d32565219b68e6dee5f67534c722e53c00d1cfc09c435ef04d7353e1e96e6"}, - {file = "types_pyyaml-6.0.12.20241230.tar.gz", hash = "sha256:7f07622dbd34bb9c8b264fe860a17e0efcad00d50b5f27e93984909d9363498c"}, + {file = "types_pyyaml-6.0.12.20250402-py3-none-any.whl", hash = "sha256:652348fa9e7a203d4b0d21066dfb00760d3cbd5a15ebb7cf8d33c88a49546681"}, + {file = "types_pyyaml-6.0.12.20250402.tar.gz", hash = "sha256:d7c13c3e6d335b6af4b0122a01ff1d270aba84ab96d1a1a1063ecba3e13ec075"}, ] [[package]] @@ -3823,13 +3808,13 @@ urllib3 = ">=2" [[package]] name = "types-s3transfer" -version = "0.11.4" +version = "0.12.0" description = "Type annotations and code completion for s3transfer" optional = false python-versions = ">=3.8" files = [ - {file = "types_s3transfer-0.11.4-py3-none-any.whl", hash = "sha256:2a76d92c07d4a3cb469e5343b2e7560e0b8078b2e03696a65407b8c44c861b61"}, - {file = "types_s3transfer-0.11.4.tar.gz", hash = "sha256:05fde593c84270f19fd053f0b1e08f5a057d7c5f036b9884e68fb8cd3041ac30"}, + {file = "types_s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:101bbc5b7f00b71512374df881f480fc6bf63c948b5098ab024bf3370fbfb0e8"}, + {file = "types_s3transfer-0.12.0.tar.gz", hash = "sha256:f8f59201481e904362873bf0be3267f259d60ad946ebdfcb847d092a1fa26f98"}, ] [[package]] @@ -3845,24 +3830,24 @@ files = [ [[package]] name = "typing-extensions" -version = "4.12.2" +version = "4.13.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, + {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, + {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, ] [[package]] name = "tzdata" -version = "2025.1" +version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, - {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, ] [[package]] @@ -3883,13 +3868,13 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "urllib3" -version = "2.3.0" +version = "2.4.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" files = [ - {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, - {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, + {file = "urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813"}, + {file = "urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466"}, ] [package.extras] @@ -3900,13 +3885,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.34.0" +version = "0.34.2" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.9" files = [ - {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"}, - {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"}, + {file = "uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403"}, + {file = "uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328"}, ] [package.dependencies] @@ -3977,13 +3962,13 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", [[package]] name = "virtualenv" -version = "20.29.3" +version = "20.31.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170"}, - {file = "virtualenv-20.29.3.tar.gz", hash = "sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac"}, + {file = "virtualenv-20.31.1-py3-none-any.whl", hash = "sha256:f448cd2f1604c831afb9ea238021060be2c0edbcad8eb0a4e8b4e14ff11a5482"}, + {file = "virtualenv-20.31.1.tar.gz", hash = "sha256:65442939608aeebb9284cd30baca5865fcd9f12b58bb740a24b220030df46d26"}, ] [package.dependencies] @@ -3997,82 +3982,82 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "watchfiles" -version = "1.0.4" +version = "1.0.5" description = "Simple, modern and high performance file watching and code reload in python." optional = true python-versions = ">=3.9" files = [ - {file = "watchfiles-1.0.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ba5bb3073d9db37c64520681dd2650f8bd40902d991e7b4cfaeece3e32561d08"}, - {file = "watchfiles-1.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f25d0ba0fe2b6d2c921cf587b2bf4c451860086534f40c384329fb96e2044d1"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47eb32ef8c729dbc4f4273baece89398a4d4b5d21a1493efea77a17059f4df8a"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:076f293100db3b0b634514aa0d294b941daa85fc777f9c698adb1009e5aca0b1"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1eacd91daeb5158c598fe22d7ce66d60878b6294a86477a4715154990394c9b3"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13c2ce7b72026cfbca120d652f02c7750f33b4c9395d79c9790b27f014c8a5a2"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:90192cdc15ab7254caa7765a98132a5a41471cf739513cc9bcf7d2ffcc0ec7b2"}, - {file = "watchfiles-1.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278aaa395f405972e9f523bd786ed59dfb61e4b827856be46a42130605fd0899"}, - {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a462490e75e466edbb9fc4cd679b62187153b3ba804868452ef0577ec958f5ff"}, - {file = "watchfiles-1.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8d0d0630930f5cd5af929040e0778cf676a46775753e442a3f60511f2409f48f"}, - {file = "watchfiles-1.0.4-cp310-cp310-win32.whl", hash = "sha256:cc27a65069bcabac4552f34fd2dce923ce3fcde0721a16e4fb1b466d63ec831f"}, - {file = "watchfiles-1.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:8b1f135238e75d075359cf506b27bf3f4ca12029c47d3e769d8593a2024ce161"}, - {file = "watchfiles-1.0.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2a9f93f8439639dc244c4d2902abe35b0279102bca7bbcf119af964f51d53c19"}, - {file = "watchfiles-1.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eea33ad8c418847dd296e61eb683cae1c63329b6d854aefcd412e12d94ee235"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31f1a379c9dcbb3f09cf6be1b7e83b67c0e9faabed0471556d9438a4a4e14202"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab594e75644421ae0a2484554832ca5895f8cab5ab62de30a1a57db460ce06c6"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc2eb5d14a8e0d5df7b36288979176fbb39672d45184fc4b1c004d7c3ce29317"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f68d8e9d5a321163ddacebe97091000955a1b74cd43724e346056030b0bacee"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9ce064e81fe79faa925ff03b9f4c1a98b0bbb4a1b8c1b015afa93030cb21a49"}, - {file = "watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b77d5622ac5cc91d21ae9c2b284b5d5c51085a0bdb7b518dba263d0af006132c"}, - {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1941b4e39de9b38b868a69b911df5e89dc43767feeda667b40ae032522b9b5f1"}, - {file = "watchfiles-1.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4f8c4998506241dedf59613082d1c18b836e26ef2a4caecad0ec41e2a15e4226"}, - {file = "watchfiles-1.0.4-cp311-cp311-win32.whl", hash = "sha256:4ebbeca9360c830766b9f0df3640b791be569d988f4be6c06d6fae41f187f105"}, - {file = "watchfiles-1.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:05d341c71f3d7098920f8551d4df47f7b57ac5b8dad56558064c3431bdfc0b74"}, - {file = "watchfiles-1.0.4-cp311-cp311-win_arm64.whl", hash = "sha256:32b026a6ab64245b584acf4931fe21842374da82372d5c039cba6bf99ef722f3"}, - {file = "watchfiles-1.0.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:229e6ec880eca20e0ba2f7e2249c85bae1999d330161f45c78d160832e026ee2"}, - {file = "watchfiles-1.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5717021b199e8353782dce03bd8a8f64438832b84e2885c4a645f9723bf656d9"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0799ae68dfa95136dde7c472525700bd48777875a4abb2ee454e3ab18e9fc712"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43b168bba889886b62edb0397cab5b6490ffb656ee2fcb22dec8bfeb371a9e12"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb2c46e275fbb9f0c92e7654b231543c7bbfa1df07cdc4b99fa73bedfde5c844"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:857f5fc3aa027ff5e57047da93f96e908a35fe602d24f5e5d8ce64bf1f2fc733"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55ccfd27c497b228581e2838d4386301227fc0cb47f5a12923ec2fe4f97b95af"}, - {file = "watchfiles-1.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c11ea22304d17d4385067588123658e9f23159225a27b983f343fcffc3e796a"}, - {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:74cb3ca19a740be4caa18f238298b9d472c850f7b2ed89f396c00a4c97e2d9ff"}, - {file = "watchfiles-1.0.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7cce76c138a91e720d1df54014a047e680b652336e1b73b8e3ff3158e05061e"}, - {file = "watchfiles-1.0.4-cp312-cp312-win32.whl", hash = "sha256:b045c800d55bc7e2cadd47f45a97c7b29f70f08a7c2fa13241905010a5493f94"}, - {file = "watchfiles-1.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:c2acfa49dd0ad0bf2a9c0bb9a985af02e89345a7189be1efc6baa085e0f72d7c"}, - {file = "watchfiles-1.0.4-cp312-cp312-win_arm64.whl", hash = "sha256:22bb55a7c9e564e763ea06c7acea24fc5d2ee5dfc5dafc5cfbedfe58505e9f90"}, - {file = "watchfiles-1.0.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:8012bd820c380c3d3db8435e8cf7592260257b378b649154a7948a663b5f84e9"}, - {file = "watchfiles-1.0.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa216f87594f951c17511efe5912808dfcc4befa464ab17c98d387830ce07b60"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c9953cf85529c05b24705639ffa390f78c26449e15ec34d5339e8108c7c407"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7cf684aa9bba4cd95ecb62c822a56de54e3ae0598c1a7f2065d51e24637a3c5d"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f44a39aee3cbb9b825285ff979ab887a25c5d336e5ec3574f1506a4671556a8d"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38320582736922be8c865d46520c043bff350956dfc9fbaee3b2df4e1740a4b"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39f4914548b818540ef21fd22447a63e7be6e24b43a70f7642d21f1e73371590"}, - {file = "watchfiles-1.0.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f12969a3765909cf5dc1e50b2436eb2c0e676a3c75773ab8cc3aa6175c16e902"}, - {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0986902677a1a5e6212d0c49b319aad9cc48da4bd967f86a11bde96ad9676ca1"}, - {file = "watchfiles-1.0.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:308ac265c56f936636e3b0e3f59e059a40003c655228c131e1ad439957592303"}, - {file = "watchfiles-1.0.4-cp313-cp313-win32.whl", hash = "sha256:aee397456a29b492c20fda2d8961e1ffb266223625346ace14e4b6d861ba9c80"}, - {file = "watchfiles-1.0.4-cp313-cp313-win_amd64.whl", hash = "sha256:d6097538b0ae5c1b88c3b55afa245a66793a8fec7ada6755322e465fb1a0e8cc"}, - {file = "watchfiles-1.0.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:d3452c1ec703aa1c61e15dfe9d482543e4145e7c45a6b8566978fbb044265a21"}, - {file = "watchfiles-1.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7b75fee5a16826cf5c46fe1c63116e4a156924d668c38b013e6276f2582230f0"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e997802d78cdb02623b5941830ab06f8860038faf344f0d288d325cc9c5d2ff"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0611d244ce94d83f5b9aff441ad196c6e21b55f77f3c47608dcf651efe54c4a"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9745a4210b59e218ce64c91deb599ae8775c8a9da4e95fb2ee6fe745fc87d01a"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4810ea2ae622add560f4aa50c92fef975e475f7ac4900ce5ff5547b2434642d8"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:740d103cd01458f22462dedeb5a3382b7f2c57d07ff033fbc9465919e5e1d0f3"}, - {file = "watchfiles-1.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdbd912a61543a36aef85e34f212e5d2486e7c53ebfdb70d1e0b060cc50dd0bf"}, - {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0bc80d91ddaf95f70258cf78c471246846c1986bcc5fd33ccc4a1a67fcb40f9a"}, - {file = "watchfiles-1.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab0311bb2ffcd9f74b6c9de2dda1612c13c84b996d032cd74799adb656af4e8b"}, - {file = "watchfiles-1.0.4-cp39-cp39-win32.whl", hash = "sha256:02a526ee5b5a09e8168314c905fc545c9bc46509896ed282aeb5a8ba9bd6ca27"}, - {file = "watchfiles-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:a5ae5706058b27c74bac987d615105da17724172d5aaacc6c362a40599b6de43"}, - {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cdcc92daeae268de1acf5b7befcd6cfffd9a047098199056c72e4623f531de18"}, - {file = "watchfiles-1.0.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8d3d9203705b5797f0af7e7e5baa17c8588030aaadb7f6a86107b7247303817"}, - {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdef5a1be32d0b07dcea3318a0be95d42c98ece24177820226b56276e06b63b0"}, - {file = "watchfiles-1.0.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:342622287b5604ddf0ed2d085f3a589099c9ae8b7331df3ae9845571586c4f3d"}, - {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9fe37a2de80aa785d340f2980276b17ef697ab8db6019b07ee4fd28a8359d2f3"}, - {file = "watchfiles-1.0.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9d1ef56b56ed7e8f312c934436dea93bfa3e7368adfcf3df4c0da6d4de959a1e"}, - {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95b42cac65beae3a362629950c444077d1b44f1790ea2772beaea95451c086bb"}, - {file = "watchfiles-1.0.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e0227b8ed9074c6172cf55d85b5670199c99ab11fd27d2c473aa30aec67ee42"}, - {file = "watchfiles-1.0.4.tar.gz", hash = "sha256:6ba473efd11062d73e4f00c2b730255f9c1bdd73cd5f9fe5b5da8dbd4a717205"}, + {file = "watchfiles-1.0.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5c40fe7dd9e5f81e0847b1ea64e1f5dd79dd61afbedb57759df06767ac719b40"}, + {file = "watchfiles-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c0db396e6003d99bb2d7232c957b5f0b5634bbd1b24e381a5afcc880f7373fb"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b551d4fb482fc57d852b4541f911ba28957d051c8776e79c3b4a51eb5e2a1b11"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:830aa432ba5c491d52a15b51526c29e4a4b92bf4f92253787f9726fe01519487"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a16512051a822a416b0d477d5f8c0e67b67c1a20d9acecb0aafa3aa4d6e7d256"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe0cbc787770e52a96c6fda6726ace75be7f840cb327e1b08d7d54eadc3bc85"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d363152c5e16b29d66cbde8fa614f9e313e6f94a8204eaab268db52231fe5358"}, + {file = "watchfiles-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ee32c9a9bee4d0b7bd7cbeb53cb185cf0b622ac761efaa2eba84006c3b3a614"}, + {file = "watchfiles-1.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29c7fd632ccaf5517c16a5188e36f6612d6472ccf55382db6c7fe3fcccb7f59f"}, + {file = "watchfiles-1.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e637810586e6fe380c8bc1b3910accd7f1d3a9a7262c8a78d4c8fb3ba6a2b3d"}, + {file = "watchfiles-1.0.5-cp310-cp310-win32.whl", hash = "sha256:cd47d063fbeabd4c6cae1d4bcaa38f0902f8dc5ed168072874ea11d0c7afc1ff"}, + {file = "watchfiles-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:86c0df05b47a79d80351cd179893f2f9c1b1cae49d96e8b3290c7f4bd0ca0a92"}, + {file = "watchfiles-1.0.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:237f9be419e977a0f8f6b2e7b0475ababe78ff1ab06822df95d914a945eac827"}, + {file = "watchfiles-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0da39ff917af8b27a4bdc5a97ac577552a38aac0d260a859c1517ea3dc1a7c4"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfcb3952350e95603f232a7a15f6c5f86c5375e46f0bd4ae70d43e3e063c13d"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:68b2dddba7a4e6151384e252a5632efcaa9bc5d1c4b567f3cb621306b2ca9f63"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95cf944fcfc394c5f9de794ce581914900f82ff1f855326f25ebcf24d5397418"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf6cd9f83d7c023b1aba15d13f705ca7b7d38675c121f3cc4a6e25bd0857ee9"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:852de68acd6212cd6d33edf21e6f9e56e5d98c6add46f48244bd479d97c967c6"}, + {file = "watchfiles-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5730f3aa35e646103b53389d5bc77edfbf578ab6dab2e005142b5b80a35ef25"}, + {file = "watchfiles-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:18b3bd29954bc4abeeb4e9d9cf0b30227f0f206c86657674f544cb032296acd5"}, + {file = "watchfiles-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ba5552a1b07c8edbf197055bc9d518b8f0d98a1c6a73a293bc0726dce068ed01"}, + {file = "watchfiles-1.0.5-cp311-cp311-win32.whl", hash = "sha256:2f1fefb2e90e89959447bc0420fddd1e76f625784340d64a2f7d5983ef9ad246"}, + {file = "watchfiles-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:b6e76ceb1dd18c8e29c73f47d41866972e891fc4cc7ba014f487def72c1cf096"}, + {file = "watchfiles-1.0.5-cp311-cp311-win_arm64.whl", hash = "sha256:266710eb6fddc1f5e51843c70e3bebfb0f5e77cf4f27129278c70554104d19ed"}, + {file = "watchfiles-1.0.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5eb568c2aa6018e26da9e6c86f3ec3fd958cee7f0311b35c2630fa4217d17f2"}, + {file = "watchfiles-1.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a04059f4923ce4e856b4b4e5e783a70f49d9663d22a4c3b3298165996d1377f"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e380c89983ce6e6fe2dd1e1921b9952fb4e6da882931abd1824c092ed495dec"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fe43139b2c0fdc4a14d4f8d5b5d967f7a2777fd3d38ecf5b1ec669b0d7e43c21"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee0822ce1b8a14fe5a066f93edd20aada932acfe348bede8aa2149f1a4489512"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0dbcb1c2d8f2ab6e0a81c6699b236932bd264d4cef1ac475858d16c403de74d"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2014a2b18ad3ca53b1f6c23f8cd94a18ce930c1837bd891262c182640eb40a6"}, + {file = "watchfiles-1.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f6ae86d5cb647bf58f9f655fcf577f713915a5d69057a0371bc257e2553234"}, + {file = "watchfiles-1.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1a7bac2bde1d661fb31f4d4e8e539e178774b76db3c2c17c4bb3e960a5de07a2"}, + {file = "watchfiles-1.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ab626da2fc1ac277bbf752446470b367f84b50295264d2d313e28dc4405d663"}, + {file = "watchfiles-1.0.5-cp312-cp312-win32.whl", hash = "sha256:9f4571a783914feda92018ef3901dab8caf5b029325b5fe4558c074582815249"}, + {file = "watchfiles-1.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:360a398c3a19672cf93527f7e8d8b60d8275119c5d900f2e184d32483117a705"}, + {file = "watchfiles-1.0.5-cp312-cp312-win_arm64.whl", hash = "sha256:1a2902ede862969077b97523987c38db28abbe09fb19866e711485d9fbf0d417"}, + {file = "watchfiles-1.0.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0b289572c33a0deae62daa57e44a25b99b783e5f7aed81b314232b3d3c81a11d"}, + {file = "watchfiles-1.0.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a056c2f692d65bf1e99c41045e3bdcaea3cb9e6b5a53dcaf60a5f3bd95fc9763"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9dca99744991fc9850d18015c4f0438865414e50069670f5f7eee08340d8b40"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:894342d61d355446d02cd3988a7326af344143eb33a2fd5d38482a92072d9563"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab44e1580924d1ffd7b3938e02716d5ad190441965138b4aa1d1f31ea0877f04"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6f9367b132078b2ceb8d066ff6c93a970a18c3029cea37bfd7b2d3dd2e5db8f"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2e55a9b162e06e3f862fb61e399fe9f05d908d019d87bf5b496a04ef18a970a"}, + {file = "watchfiles-1.0.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0125f91f70e0732a9f8ee01e49515c35d38ba48db507a50c5bdcad9503af5827"}, + {file = "watchfiles-1.0.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13bb21f8ba3248386337c9fa51c528868e6c34a707f729ab041c846d52a0c69a"}, + {file = "watchfiles-1.0.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:839ebd0df4a18c5b3c1b890145b5a3f5f64063c2a0d02b13c76d78fe5de34936"}, + {file = "watchfiles-1.0.5-cp313-cp313-win32.whl", hash = "sha256:4a8ec1e4e16e2d5bafc9ba82f7aaecfeec990ca7cd27e84fb6f191804ed2fcfc"}, + {file = "watchfiles-1.0.5-cp313-cp313-win_amd64.whl", hash = "sha256:f436601594f15bf406518af922a89dcaab416568edb6f65c4e5bbbad1ea45c11"}, + {file = "watchfiles-1.0.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2cfb371be97d4db374cba381b9f911dd35bb5f4c58faa7b8b7106c8853e5d225"}, + {file = "watchfiles-1.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a3904d88955fda461ea2531fcf6ef73584ca921415d5cfa44457a225f4a42bc1"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b7a21715fb12274a71d335cff6c71fe7f676b293d322722fe708a9ec81d91f5"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dfd6ae1c385ab481766b3c61c44aca2b3cd775f6f7c0fa93d979ddec853d29d5"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b659576b950865fdad31fa491d31d37cf78b27113a7671d39f919828587b429b"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1909e0a9cd95251b15bff4261de5dd7550885bd172e3536824bf1cf6b121e200"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:832ccc221927c860e7286c55c9b6ebcc0265d5e072f49c7f6456c7798d2b39aa"}, + {file = "watchfiles-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85fbb6102b3296926d0c62cfc9347f6237fb9400aecd0ba6bbda94cae15f2b3b"}, + {file = "watchfiles-1.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:15ac96dd567ad6c71c71f7b2c658cb22b7734901546cd50a475128ab557593ca"}, + {file = "watchfiles-1.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b6227351e11c57ae997d222e13f5b6f1f0700d84b8c52304e8675d33a808382"}, + {file = "watchfiles-1.0.5-cp39-cp39-win32.whl", hash = "sha256:974866e0db748ebf1eccab17862bc0f0303807ed9cda465d1324625b81293a18"}, + {file = "watchfiles-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:9848b21ae152fe79c10dd0197304ada8f7b586d3ebc3f27f43c506e5a52a863c"}, + {file = "watchfiles-1.0.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f59b870db1f1ae5a9ac28245707d955c8721dd6565e7f411024fa374b5362d1d"}, + {file = "watchfiles-1.0.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9475b0093767e1475095f2aeb1d219fb9664081d403d1dff81342df8cd707034"}, + {file = "watchfiles-1.0.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc533aa50664ebd6c628b2f30591956519462f5d27f951ed03d6c82b2dfd9965"}, + {file = "watchfiles-1.0.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed1cd825158dcaae36acce7b2db33dcbfd12b30c34317a88b8ed80f0541cc57"}, + {file = "watchfiles-1.0.5-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:554389562c29c2c182e3908b149095051f81d28c2fec79ad6c8997d7d63e0009"}, + {file = "watchfiles-1.0.5-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a74add8d7727e6404d5dc4dcd7fac65d4d82f95928bbee0cf5414c900e86773e"}, + {file = "watchfiles-1.0.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb1489f25b051a89fae574505cc26360c8e95e227a9500182a7fe0afcc500ce0"}, + {file = "watchfiles-1.0.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0901429650652d3f0da90bad42bdafc1f9143ff3605633c455c999a2d786cac"}, + {file = "watchfiles-1.0.5.tar.gz", hash = "sha256:b7529b5dcc114679d43827d8c35a07c493ad6f083633d573d81c660abc5979e9"}, ] [package.dependencies] @@ -4241,4 +4226,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "85d3bb6397635a718f38d11a0a02d0594ade1c30dcfe02e74a9507952ec6e5f1" +content-hash = "47c32224ef18a3d012cb690d45dff501ba1a413d2281c079c4a68761fcf229f0" diff --git a/pyproject.toml b/pyproject.toml index e9681321..7c3a1bff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ fqfa = "~1.3.0" pyhumps = "~3.8.0" pyyaml = "~6.0.1" IDUtils = "~1.2.0" -mavehgvs = "~0.6.0" +mavehgvs = "~0.7.0" eutils = "~0.6.0" email_validator = "~2.1.1" numpy = "~1.26" From 219e1e0f3b84cf80079f37a538f2804b74924a67 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 6 May 2025 11:34:18 -0700 Subject: [PATCH 118/166] Fixed: MaveHGVS parsing error handling --- src/mavedb/lib/validation/dataframe/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index 20aa04fe..edb190b1 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -238,7 +238,7 @@ def _validate_allelic_variation(variant: Variant) -> bool: validator.validate(parser.parse(str(variant_obj)), strict=False) except MaveHgvsParseError as e: logger.error("err", exc_info=e) - return False, f"Failed to parse variant string '{str(variant_obj)}' at row {idx}." + return False, f"Failed to parse variant string '{variant}' at row {idx}." except hgvs.exceptions.HGVSError as e: return False, f"Failed to parse row {idx} with HGVS exception: {e}." From 38a0d7b37b0bc810591af1eecc6f5859ab5c3f8d Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 6 May 2025 13:29:04 -0700 Subject: [PATCH 119/166] Fixed rebasing error. --- tests/worker/test_jobs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index f148be78..0cd66413 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -1789,7 +1789,7 @@ async def dummy_submission_job(): return_value=dummy_submission_job(), ), patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ArqRedis, "enqueue_job", side_effect=Exception()), + patch.object(arq.ArqRedis, "enqueue_job", side_effect=Exception()), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) @@ -1822,7 +1822,7 @@ async def dummy_submission_job(): return_value=dummy_submission_job(), ), patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), - patch.object(ArqRedis, "enqueue_job", return_value=None), + patch.object(arq.ArqRedis, "enqueue_job", return_value=None), ): result = await submit_score_set_mappings_to_ldh(standalone_worker_context, uuid4().hex, score_set.id) @@ -2089,7 +2089,7 @@ async def dummy_linking_job(): "mavedb.worker.jobs.LINKED_DATA_RETRY_THRESHOLD", 1, ), - patch.object(ArqRedis, "enqueue_job", return_value=awaitable_exception()), + patch.object(arq.ArqRedis, "enqueue_job", return_value=awaitable_exception()), ): result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) From a5835a5393fc49b4606f528e2ded4e6d90f6cfae Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Tue, 6 May 2025 13:51:04 -0700 Subject: [PATCH 120/166] Fixed rebasing error. --- tests/conftest_optional.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/conftest_optional.py b/tests/conftest_optional.py index 722e8dc6..32e580e8 100644 --- a/tests/conftest_optional.py +++ b/tests/conftest_optional.py @@ -16,7 +16,13 @@ from mavedb.server_main import app from mavedb.deps import get_db, get_worker, hgvs_data_provider from arq.worker import Worker -from mavedb.worker.jobs import create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager +from mavedb.worker.jobs import ( + create_variants_for_score_set, + map_variants_for_score_set, + link_clingen_variants, + submit_score_set_mappings_to_ldh, + variant_mapper_manager, +) from tests.helpers.constants import ADMIN_USER, EXTRA_USER, TEST_USER @@ -100,7 +106,13 @@ async def on_job(ctx): ctx["pool"] = futures.ProcessPoolExecutor() worker_ = Worker( - functions=[create_variants_for_score_set, map_variants_for_score_set, variant_mapper_manager], + functions=[ + create_variants_for_score_set, + map_variants_for_score_set, + variant_mapper_manager, + submit_score_set_mappings_to_ldh, + link_clingen_variants, + ], redis_pool=arq_redis, burst=True, poll_delay=0, From 68b1a428036d5913e78b338eef7addcac823a178 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 7 May 2025 14:41:41 -0700 Subject: [PATCH 121/166] Ignore problematic MyPy error. --- src/mavedb/lib/validation/dataframe/variant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/lib/validation/dataframe/variant.py b/src/mavedb/lib/validation/dataframe/variant.py index edb190b1..2e1817b1 100644 --- a/src/mavedb/lib/validation/dataframe/variant.py +++ b/src/mavedb/lib/validation/dataframe/variant.py @@ -217,7 +217,7 @@ def _validate_allelic_variation(variant: Variant) -> bool: If the variant is not a valid HGVS string (for reasons of transcript/variant inconsistency). """ - for variant_sub_string in variant.components(): + for variant_sub_string in variant.components(): # type: ignore validator.validate(parser.parse(variant_sub_string), strict=False) return True From b4e6a1545f641269c71e5df20114031886c25564 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 8 May 2025 13:26:16 -0700 Subject: [PATCH 122/166] Run 3.9 tests on ubuntu-latest --- .github/workflows/run-tests-on-push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-tests-on-push.yml b/.github/workflows/run-tests-on-push.yml index 803541af..ab61065a 100644 --- a/.github/workflows/run-tests-on-push.yml +++ b/.github/workflows/run-tests-on-push.yml @@ -7,8 +7,8 @@ env: jobs: run-tests-3_9-core-dependencies: - runs-on: ubuntu-20.04 - name: Pytest on Core Dependencies-- Python 3.9 / Ubuntu 20.04 + runs-on: ubuntu-latest + name: Pytest on Core Dependencies-- Python 3.9 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -21,8 +21,8 @@ jobs: - run: poetry run pytest tests/ run-tests-3_9: - runs-on: ubuntu-20.04 - name: Pytest on Optional Dependencies-- Python 3.9 / Ubuntu 20.04 + runs-on: ubuntu-latest + name: Pytest on Optional Dependencies-- Python 3.9 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From 9c296493ff6d240ec8cc2b56d75d1e62d52fd0af Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 8 May 2025 13:46:58 -0700 Subject: [PATCH 123/166] Add Import Skipper to ClinGen LDH Service Tests --- tests/lib/clingen/test_linked_data_hub.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py index 43dd80fd..6e34328d 100644 --- a/tests/lib/clingen/test_linked_data_hub.py +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -1,9 +1,15 @@ +# ruff: noqa: E402 + import os -from urllib import parse import pytest import requests from datetime import datetime from unittest.mock import patch, MagicMock +from urllib import parse + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.utils import batched From e7ca7b1c1a5f3a74a06d175d85b505e127a0ef07 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 8 May 2025 14:00:18 -0700 Subject: [PATCH 124/166] Import all SA models up-front for testing SA complains about unitialized names for models when some are not imported. This is usually done in the server_main file, but becomes an issue when testing on only core dependencies since not all models are loaded and that file is no longer imported by the test suite. Do this step up front in the top level conftest file instead to ensure we load all model objects into the SA namespace for testing. --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index c5a2ef9e..3e4a3d96 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ from sqlalchemy.pool import NullPool from mavedb.db.base import Base +from mavedb.models import * # noqa: F403 sys.path.append(".") From 7dff91419c0df2d26a1b4fc7ca9608b9899dea8a Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 13 May 2025 11:31:26 -0700 Subject: [PATCH 125/166] Add ClinGen Submission EnvVar to Control Submission Adds a environment variable `CLIN_GEN_SUBMISSION_ENABLED` that controls whether ClinGen submission jobs are enqueued by the worker. Tests are edited to control the value of this environment variable. --- src/mavedb/lib/clingen/constants.py | 2 + src/mavedb/worker/jobs.py | 27 ++++--- tests/worker/test_jobs.py | 113 ++++++++++++++++++++++------ 3 files changed, 109 insertions(+), 33 deletions(-) diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index 7e21f77b..add9bdc0 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -1,5 +1,7 @@ import os +CLIN_GEN_SUBMISSION_ENABLED = os.getenv("SUBMISSION_ENABLED", "false").lower() == "true" + GENBOREE_ACCOUNT_NAME = os.getenv("GENBOREE_ACCOUNT_NAME") GENBOREE_ACCOUNT_PASSWORD = os.getenv("GENBOREE_ACCOUNT_PASSWORD") diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 2b694268..59df8060 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -19,6 +19,7 @@ DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL, LINKED_DATA_RETRY_THRESHOLD, + CLIN_GEN_SUBMISSION_ENABLED, ) from mavedb.lib.clingen.content_constructors import construct_ldh_submission from mavedb.lib.clingen.linked_data_hub import ( @@ -560,20 +561,26 @@ async def map_variants_for_score_set( new_job_id = None try: - new_job = await redis.enqueue_job( - "submit_score_set_mappings_to_ldh", - correlation_id, - score_set.id, - ) + if CLIN_GEN_SUBMISSION_ENABLED: + new_job = await redis.enqueue_job( + "submit_score_set_mappings_to_ldh", + correlation_id, + score_set.id, + ) - if new_job: - new_job_id = new_job.job_id + if new_job: + new_job_id = new_job.job_id - logging_context["submit_clingen_variants_job_id"] = new_job_id - logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + logging_context["submit_clingen_variants_job_id"] = new_job_id + logger.info(msg="Queued a new ClinGen submission job.", extra=logging_context) + else: + raise SubmissionEnqueueError() else: - raise SubmissionEnqueueError() + logger.warning( + msg="ClinGen submission is disabled, skipped submission of mapped variants to LDH.", + extra=logging_context, + ) except Exception as e: send_slack_error(e) diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 0cd66413..acd23ec0 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -129,16 +129,19 @@ async def setup_records_files_and_variants_with_mapping( async def dummy_mapping_job(): return await setup_mapping_output(async_client, session, score_set) - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) assert result["success"] assert not result["retried"] - assert result["enqueued_job"] is not None + assert result["enqueued_job"] is None return session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() @@ -501,6 +504,7 @@ async def dummy_linking_job(): patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts) await arq_worker.async_run() @@ -595,10 +599,13 @@ async def dummy_mapping_job(): # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -638,10 +645,13 @@ async def dummy_mapping_job(): # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): existing_variant = session.scalars(select(Variant)).first() @@ -1061,10 +1071,13 @@ async def dummy_mapping_job(): # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine # object that sets up test mappingn output. - with patch.object( - _UnixSelectorEventLoop, - "run_in_executor", - return_value=dummy_mapping_job(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_mapping_job(), + ), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): result = await map_variants_for_score_set(standalone_worker_context, uuid4().hex, score_set.id, 1) @@ -1434,10 +1447,59 @@ async def dummy_linking_job(): patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): - await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() - await arq_worker.run_check() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed all jobs exactly once. + assert num_completed_jobs == 4 + + score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() + mapped_variants_for_score_set = session.scalars( + select(MappedVariant).join(Variant).join(ScoreSetDbModel).filter(ScoreSetDbModel.urn == score_set.urn) + ).all() + assert (await arq_redis.llen(MAPPING_QUEUE_NAME)) == 0 + assert (await arq_redis.get(MAPPING_CURRENT_ID_NAME)).decode("utf-8") == "" + assert len(mapped_variants_for_score_set) == score_set.num_variants + assert score_set.mapping_state == MappingState.complete + assert score_set.mapping_errors is None + + +@pytest.mark.asyncio +async def test_mapping_manager_enqueues_mapping_process_with_successful_mapping_linking_disabled( + setup_worker_db, standalone_worker_context, session, async_client, data_files, arq_worker, arq_redis +): + score_set = await setup_records_files_and_variants( + session, + async_client, + data_files, + TEST_MINIMAL_SEQ_SCORESET, + standalone_worker_context, + ) + + async def dummy_mapping_job(): + return await setup_mapping_output(async_client, session, score_set) + + # We seem unable to mock requests via requests_mock that occur inside another event loop. Workaround + # this limitation by instead patching the _UnixSelectorEventLoop 's executor function, with a coroutine + # object that sets up test mappingn output. + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + side_effect=[dummy_mapping_job()], + ), + patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), + patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", False), + ): + await arq_worker.async_run() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the manager and mapping jobs, but not the submission or linking jobs. + assert num_completed_jobs == 2 score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() mapped_variants_for_score_set = session.scalars( @@ -1491,10 +1553,13 @@ async def dummy_linking_job(): patch.object(ClinGenLdhService, "_existing_jwt", return_value="test_jwt"), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0), + patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True), ): - await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() - await arq_worker.run_check() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed the mapping manager job twice, the mapping job twice, the submission job, and the linking job. + assert num_completed_jobs == 6 score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() mapped_variants_for_score_set = session.scalars( @@ -1533,9 +1598,11 @@ async def failed_mapping_job(): ), patch("mavedb.worker.jobs.MAPPING_BACKOFF_IN_SECONDS", 0), ): - await arq_redis.enqueue_job("variant_mapper_manager", uuid4().hex, 1) await arq_worker.async_run() - await arq_worker.run_check() + num_completed_jobs = await arq_worker.run_check() + + # We should have completed 6 mapping jobs and 6 management jobs. + assert num_completed_jobs == 12 score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn)).one() mapped_variants_for_score_set = session.scalars( From f3c4a7713a8bee45dd1f1481a1b2e483c9d63ff9 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 13 May 2025 12:05:16 -0700 Subject: [PATCH 126/166] Reorder Base Editor Alembic Migration --- .../f69b4049bc3b_add_is_base_editor_column_to_target_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py index 041edda4..bd51702a 100644 --- a/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py +++ b/alembic/versions/f69b4049bc3b_add_is_base_editor_column_to_target_.py @@ -1,7 +1,7 @@ """Add is_base_editor column to target_accessions Revision ID: f69b4049bc3b -Revises: c404b6719110 +Revises: d6e5a9fde3c9 Create Date: 2025-03-02 14:06:52.217554 """ @@ -12,7 +12,7 @@ # revision identifiers, used by Alembic. revision = "f69b4049bc3b" -down_revision = "c404b6719110" +down_revision = "d6e5a9fde3c9" branch_labels = None depends_on = None From ce029bee5ad0b6d97afda7681496da365401b991 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Tue, 13 May 2025 15:09:33 -0700 Subject: [PATCH 127/166] Rework ClinGen LDH Access Endpoint Environment Variable --- settings/.env.template | 9 +++++++++ src/mavedb/lib/clingen/constants.py | 8 ++++---- src/mavedb/lib/clingen/linked_data_hub.py | 4 ++-- src/mavedb/scripts/clingen_ldh_submission.py | 4 ++-- src/mavedb/worker/jobs.py | 4 ++-- tests/lib/clingen/test_linked_data_hub.py | 6 +++--- tests/worker/test_jobs.py | 21 +++++++++++++++++--- 7 files changed, 40 insertions(+), 16 deletions(-) diff --git a/settings/.env.template b/settings/.env.template index 5d4af067..0c79b881 100644 --- a/settings/.env.template +++ b/settings/.env.template @@ -60,3 +60,12 @@ HGVS_SEQREPO_DIR=/usr/local/share/seqrepo/2024-12-20 MAVEDB_BASE_URL=http://app:8000 MAVEDB_API_KEY=secret DCD_MAPPING_URL=http://dcd-mapping:8000 + +#################################################################################################### +# Environment variables for ClinGen +#################################################################################################### + +GENBOREE_ACCOUNT_NAME= +GENBOREE_ACCOUNT_PASSWORD= +CLIN_GEN_TENANT= +CLIN_GEN_SUBMISSION_ENABLED= diff --git a/src/mavedb/lib/clingen/constants.py b/src/mavedb/lib/clingen/constants.py index add9bdc0..0ee67b3f 100644 --- a/src/mavedb/lib/clingen/constants.py +++ b/src/mavedb/lib/clingen/constants.py @@ -1,19 +1,19 @@ import os -CLIN_GEN_SUBMISSION_ENABLED = os.getenv("SUBMISSION_ENABLED", "false").lower() == "true" +CLIN_GEN_SUBMISSION_ENABLED = os.getenv("CLIN_GEN_SUBMISSION_ENABLED", "false").lower() == "true" GENBOREE_ACCOUNT_NAME = os.getenv("GENBOREE_ACCOUNT_NAME") GENBOREE_ACCOUNT_PASSWORD = os.getenv("GENBOREE_ACCOUNT_PASSWORD") CLIN_GEN_TENANT = os.getenv("CLIN_GEN_TENANT") -LDH_TENANT = os.getenv("LDH_TENANT") LDH_SUBMISSION_TYPE = "cg-ldh-ld-submission" LDH_ENTITY_NAME = "MaveDBMapping" LDH_ENTITY_ENDPOINT = "maveDb" # for some reason, not the same :/ DEFAULT_LDH_SUBMISSION_BATCH_SIZE = 100 -LDH_SUBMISSION_URL = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" -LDH_LINKED_DATA_URL = f"https://genboree.org/{LDH_TENANT}/{LDH_ENTITY_NAME}/id" +LDH_SUBMISSION_ENDPOINT = f"https://genboree.org/mq/brdg/pulsar/{CLIN_GEN_TENANT}/ldh/submissions/{LDH_ENTITY_ENDPOINT}" +LDH_ACCESS_ENDPOINT = os.getenv("LDH_ACCESS_ENDPOINT", "https://genboree.org/ldh") +LDH_MAVE_ACCESS_ENDPOINT = f"{LDH_ACCESS_ENDPOINT}/{LDH_ENTITY_NAME}/id" LINKED_DATA_RETRY_THRESHOLD = 0.95 diff --git a/src/mavedb/lib/clingen/linked_data_hub.py b/src/mavedb/lib/clingen/linked_data_hub.py index 34718d40..f7d23c8c 100644 --- a/src/mavedb/lib/clingen/linked_data_hub.py +++ b/src/mavedb/lib/clingen/linked_data_hub.py @@ -9,7 +9,7 @@ from jose import jwt from mavedb.lib.logging.context import logging_context, save_to_logging_context, format_raised_exception_info_as_dict -from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_LINKED_DATA_URL +from mavedb.lib.clingen.constants import GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD, LDH_MAVE_ACCESS_ENDPOINT from mavedb.lib.types.clingen import LdhSubmission from mavedb.lib.utils import batched @@ -212,7 +212,7 @@ def get_clingen_variation(urn: str) -> Optional[dict]: or None if the request fails. """ response = requests.get( - f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, ) diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index e25563fc..fbc3df6a 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -10,7 +10,7 @@ from mavedb.models.mapped_variant import MappedVariant from mavedb.scripts.environment import with_database_session from mavedb.lib.clingen.linked_data_hub import ClinGenLdhService -from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_URL +from mavedb.lib.clingen.constants import DEFAULT_LDH_SUBMISSION_BATCH_SIZE, LDH_SUBMISSION_ENDPOINT from mavedb.lib.clingen.content_constructors import construct_ldh_submission from mavedb.lib.variants import hgvs_from_mapped_variant @@ -18,7 +18,7 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> list[str]: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) ldh_service.authenticate() submitted_entities = [] diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 59df8060..74d645a0 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -17,7 +17,7 @@ from mavedb.db.view import refresh_all_mat_views from mavedb.lib.clingen.constants import ( DEFAULT_LDH_SUBMISSION_BATCH_SIZE, - LDH_SUBMISSION_URL, + LDH_SUBMISSION_ENDPOINT, LINKED_DATA_RETRY_THRESHOLD, CLIN_GEN_SUBMISSION_ENABLED, ) @@ -792,7 +792,7 @@ async def submit_score_set_mappings_to_ldh(ctx: dict, correlation_id: str, score return {"success": False, "retried": False, "enqueued_job": None} try: - ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_URL) + ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) ldh_service.authenticate() except Exception as e: send_slack_error(e) diff --git a/tests/lib/clingen/test_linked_data_hub.py b/tests/lib/clingen/test_linked_data_hub.py index 6e34328d..9f7ac07a 100644 --- a/tests/lib/clingen/test_linked_data_hub.py +++ b/tests/lib/clingen/test_linked_data_hub.py @@ -11,7 +11,7 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -from mavedb.lib.clingen.constants import LDH_LINKED_DATA_URL, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD +from mavedb.lib.clingen.constants import LDH_MAVE_ACCESS_ENDPOINT, GENBOREE_ACCOUNT_NAME, GENBOREE_ACCOUNT_PASSWORD from mavedb.lib.utils import batched from mavedb.lib.clingen.linked_data_hub import ( ClinGenLdhService, @@ -225,7 +225,7 @@ def test_get_clingen_variation_success(mock_get): assert result == mocked_response_json mock_get.assert_called_once_with( - f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, ) @@ -242,7 +242,7 @@ def test_get_clingen_variation_failure(mock_get): assert result is None mock_get.assert_called_once_with( - f"{LDH_LINKED_DATA_URL}/{parse.quote_plus(urn)}", + f"{LDH_MAVE_ACCESS_ENDPOINT}/{parse.quote_plus(urn)}", headers={"Accept": "application/json"}, ) diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index acd23ec0..b7b350b0 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -2027,11 +2027,26 @@ async def test_link_score_set_mappings_to_ldh_objects_exception_while_parsing_li standalone_worker_context, ) + async def dummy_linking_job(): + return [ + (variant_urn, TEST_CLINGEN_LDH_LINKING_RESPONSE) + for variant_urn in session.scalars( + select(Variant.urn).join(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set.urn) + ).all() + ] + # We are unable to mock requests via requests_mock that occur inside another event loop. Instead, patch the return # value of the EventLoop itself, which would have made the request. - with patch( - "mavedb.lib.clingen.linked_data_hub.clingen_allele_id_from_ldh_variation", - side_effect=Exception(), + with ( + patch.object( + _UnixSelectorEventLoop, + "run_in_executor", + return_value=dummy_linking_job(), + ), + patch( + "mavedb.worker.jobs.clingen_allele_id_from_ldh_variation", + side_effect=Exception(), + ), ): result = await link_clingen_variants(standalone_worker_context, uuid4().hex, score_set.id, 1) From 81b0defee741e95a2883ca0e6a0be3d6040df637 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 10:45:44 -0700 Subject: [PATCH 128/166] New ClinGen LDH submission script options There are two new options: - "--unlinked" limits the submission to variants that have not yet been linked. - "--prefer-unmapped-hgvs" causes the script to use accession-based hgvs_nt or hgvs_pro strings instead of post-mapped HGVS strings. In addition, the script will now submit variants that have not been mapped, if these have accession-based hgvs_nt or hgvs_pro strings. --- .../lib/clingen/content_constructors.py | 15 +++-- src/mavedb/scripts/clingen_ldh_submission.py | 64 +++++++++++++++---- src/mavedb/scripts/link_clingen_variants.py | 6 +- 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index b9c35e27..5ec82d27 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Optional from uuid import uuid4 from mavedb import __version__ @@ -30,16 +31,13 @@ def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject: return {"Variant": {"hgvs": hgvs}} -def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVariant) -> LdhContentLinkedData: - return { +def construct_ldh_submission_entity(variant: Variant, mapped_variant: Optional[MappedVariant]) -> LdhContentLinkedData: + entity = { # TODO#372: We try to make all possible fields that are non-nullable represented that way. "MaveDBMapping": [ { "entContent": { "mavedb_id": variant.urn, # type: ignore - "pre_mapped": mapped_variant.pre_mapped, # type: ignore - "post_mapped": mapped_variant.post_mapped, # type: ignore - "mapping_api_version": mapped_variant.mapping_api_version, # type: ignore "score": variant.data["score_data"]["score"], # type: ignore }, "entId": variant.urn, # type: ignore @@ -47,9 +45,14 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVari } ] } + if mapped_variant is not None: + entity["MaveDBMapping"][0]["pre_mapped"] = mapped_variant.pre_mapped + entity["MaveDBMapping"][0]["post_mapped"] = mapped_variant.post_mapped + entity["MaveDBMapping"][0]["mapping_api_version"] = mapped_variant.mapping_api_version + return entity -def construct_ldh_submission(variant_content: list[tuple[str, Variant, MappedVariant]]) -> list[LdhSubmission]: +def construct_ldh_submission(variant_content: list[tuple[str, Variant, Optional[MappedVariant]]]) -> list[LdhSubmission]: content_submission: list[LdhSubmission] = [] for hgvs, variant, mapped_variant in variant_content: subject = construct_ldh_submission_subject(hgvs) diff --git a/src/mavedb/scripts/clingen_ldh_submission.py b/src/mavedb/scripts/clingen_ldh_submission.py index fbc3df6a..cae2b050 100644 --- a/src/mavedb/scripts/clingen_ldh_submission.py +++ b/src/mavedb/scripts/clingen_ldh_submission.py @@ -1,8 +1,9 @@ import click import logging -from typing import Sequence +import re +from typing import Optional, Sequence -from sqlalchemy import select +from sqlalchemy import and_, select from sqlalchemy.orm import Session from mavedb.models.score_set import ScoreSet @@ -16,8 +17,10 @@ logger = logging.getLogger(__name__) +intronic_variant_with_reference_regex = re.compile(r":c\..*[+-]") +variant_with_reference_regex = re.compile(r":") -def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> list[str]: +def submit_urns_to_clingen(db: Session, urns: Sequence[str], unlinked_only: bool, prefer_unmapped_hgvs: bool, debug: bool) -> list[str]: ldh_service = ClinGenLdhService(url=LDH_SUBMISSION_ENDPOINT) ldh_service.authenticate() @@ -37,13 +40,12 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis continue logger.info(f"Submitting mapped variants to LDH service for score set with URN: {urn}") + mapped_variant_join_clause = and_(MappedVariant.variant_id == Variant.id, MappedVariant.post_mapped.is_not(None), MappedVariant.current.is_(True)) variant_objects = db.execute( select(Variant, MappedVariant) - .join(MappedVariant) + .join(MappedVariant, mapped_variant_join_clause, isouter=True) .join(ScoreSet) .where(ScoreSet.urn == urn) - .where(MappedVariant.post_mapped.is_not(None)) - .where(MappedVariant.current.is_(True)) ).all() if not variant_objects: @@ -52,12 +54,48 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis logger.debug(f"Preparing {len(variant_objects)} mapped variants for submission") - variant_content: list[tuple[str, Variant, MappedVariant]] = [] + variant_content: list[tuple[str, Variant, Optional[MappedVariant]]] = [] for variant, mapped_variant in variant_objects: - variation = hgvs_from_mapped_variant(mapped_variant) + if mapped_variant is None: + if variant.hgvs_nt is not None and intronic_variant_with_reference_regex.search(variant.hgvs_nt): + # Use the hgvs_nt string for unmapped intronic variants. This is because our mapper does not yet + # support mapping intronic variants. + variation = [variant.hgvs_nt] + if variation: + logger.info(f"Using hgvs_nt for unmapped intronic variant {variant.urn}: {variation}") + elif variant.hgvs_nt is not None and variant_with_reference_regex.search(variant.hgvs_nt): + # Use the hgvs_nt string for other unmapped NT variants in accession-based score sets. + variation = [variant.hgvs_nt] + if variation: + logger.info(f"Using hgvs_nt for unmapped non-intronic variant {variant.urn}: {variation}") + elif variant.hgvs_pro is not None and variant_with_reference_regex.search(variant.hgvs_pro): + # Use the hgvs_pro string for unmapped PRO variants in accession-based score sets. + variation = [variant.hgvs_pro] + if variation: + logger.info(f"Using hgvs_pro for unmapped non-intronic variant {variant.urn}: {variation}") + else: + logger.warning(f"No variation found for unmapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice}).") + continue + else: + if unlinked_only and mapped_variant.clingen_allele_id: + continue + # If the script was run with the --prefer-unmapped-hgvs flag, use the hgvs_nt string rather than the + # mapped variant, as long as the variant is accession-based. + if prefer_unmapped_hgvs and variant.hgvs_nt is not None and variant_with_reference_regex.search(variant.hgvs_nt): + variation = [variant.hgvs_nt] + if variation: + logger.info(f"Using hgvs_nt for mapped variant {variant.urn}: {variation}") + elif prefer_unmapped_hgvs and variant.hgvs_pro is not None and variant_with_reference_regex.search(variant.hgvs_pro): + variation = [variant.hgvs_pro] + if variation: + logger.info(f"Using hgvs_pro for mapped variant {variant.urn}: {variation}") # continue # TEMPORARY. Only submit unmapped variants. + else: + variation = hgvs_from_mapped_variant(mapped_variant) + if variation: + logger.info(f"Using mapped variant for {variant.urn}: {variation}") if not variation: - logger.warning(f"No variation found for variant {variant.urn}.") + logger.warning(f"No variation found for mapped variant {variant.urn} (nt: {variant.hgvs_nt}, aa: {variant.hgvs_pro}, splice: {variant.hgvs_splice}).") continue for allele in variation: @@ -90,11 +128,13 @@ def submit_urns_to_clingen(db: Session, urns: Sequence[str], debug: bool) -> lis @click.command() @with_database_session @click.argument("urns", nargs=-1) -@click.option("--all", help="Submit mapped variants for every score set in MaveDB.", is_flag=True) +@click.option("--all", help="Submit variants for every score set in MaveDB.", is_flag=True) +@click.option("--unlinked", default=False, help="Only submit variants that have not already been linked to ClinGen alleles.", is_flag=True) +@click.option("--prefer-unmapped-hgvs", default=False, help="If the unmapped HGVS string is accession-based, use it in the submission instead of the mapped variant.", is_flag=True) @click.option("--suppress-output", help="Suppress final print output to the console.", is_flag=True) @click.option("--debug", help="Enable debug mode. This will send only one request at most to ClinGen", is_flag=True) def submit_clingen_urns_command( - db: Session, urns: Sequence[str], all: bool, suppress_output: bool, debug: bool + db: Session, urns: Sequence[str], all: bool, unlinked: bool, prefer_unmapped_hgvs: bool, suppress_output: bool, debug: bool ) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. @@ -111,7 +151,7 @@ def submit_clingen_urns_command( logger.error("No URNs provided. Please provide at least one URN.") return - submitted_variant_urns = submit_urns_to_clingen(db, urns, debug) + submitted_variant_urns = submit_urns_to_clingen(db, urns, unlinked, prefer_unmapped_hgvs, debug) if not suppress_output: print(", ".join(submitted_variant_urns)) diff --git a/src/mavedb/scripts/link_clingen_variants.py b/src/mavedb/scripts/link_clingen_variants.py index 5f81e308..70bd988a 100644 --- a/src/mavedb/scripts/link_clingen_variants.py +++ b/src/mavedb/scripts/link_clingen_variants.py @@ -2,7 +2,7 @@ import logging from typing import Sequence -from sqlalchemy import select +from sqlalchemy import and_, select from sqlalchemy.orm import Session from mavedb.lib.clingen.linked_data_hub import get_clingen_variation, clingen_allele_id_from_ldh_variation @@ -18,7 +18,7 @@ @with_database_session @click.argument("urns", nargs=-1) @click.option("--score-sets/--variants", default=False) -@click.option("--unlinked", default=False) +@click.option("--unlinked", default=False, is_flag=True) def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, unlinked: bool) -> None: """ Submit data to ClinGen for mapped variant allele ID generation for the given URNs. @@ -51,7 +51,7 @@ def link_clingen_variants(db: Session, urns: Sequence[str], score_sets: bool, un failed_urns.append(urn) continue - mapped_variant = db.scalar(select(MappedVariant).join(Variant).where(Variant.urn == urn)) + mapped_variant = db.scalar(select(MappedVariant).join(Variant).where(and_(Variant.urn == urn, MappedVariant.current.is_(True)))) if not mapped_variant: logger.warning(f"No mapped variant found for URN {urn}.") From b73d0bdfd56a47ff00351b03b19b2a80eec33652 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 10:50:27 -0700 Subject: [PATCH 129/166] Add score set description and null IRI to the ClinGen LDH submission. --- src/mavedb/lib/clingen/content_constructors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index 5ec82d27..b805c1eb 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -15,7 +15,7 @@ def construct_ldh_submission_event(sbj: LdhContentSubject) -> LdhEvent: "type": LDH_SUBMISSION_TYPE, "name": LDH_ENTITY_NAME, "uuid": str(uuid4()), - "sbj": {"id": sbj["Variant"]["hgvs"], "type": "Variant", "format": "hgvs", "add": True}, + "sbj": {"id": sbj["Variant"]["hgvs"], "type": "Variant", "format": "hgvs", "add": True, "iri": None}, "triggered": { "by": { "host": MAVEDB_BASE_GIT, @@ -39,6 +39,7 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: Optional[M "entContent": { "mavedb_id": variant.urn, # type: ignore "score": variant.data["score_data"]["score"], # type: ignore + "score_set_description": variant.score_set.short_description, # type: ignore }, "entId": variant.urn, # type: ignore "entIri": f"{MAVEDB_FRONTEND_URL}/{variant.urn}", # type: ignore From 8e58ee9f6058a1c834d8791e2a65a62d392bb2c5 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 10:54:43 -0700 Subject: [PATCH 130/166] Add "urns" option to the ClinVar control refresh script. --- .../scripts/refresh_clinvar_variant_data.py | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index b09cb337..fd47e7fd 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -1,4 +1,6 @@ import click +from mavedb.models.score_set import ScoreSet +from mavedb.models.variant import Variant import requests import csv import time @@ -7,10 +9,10 @@ import random import io -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, Sequence from datetime import date -from sqlalchemy import select, distinct, func +from sqlalchemy import and_, select, distinct from sqlalchemy.orm import Session from mavedb.models.mapped_variant import MappedVariant @@ -62,16 +64,29 @@ def query_clingen_allele_api(allele_id: str) -> Dict[str, Any]: return response.json() -def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[str]) -> None: +def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[str], urns: Sequence[str]) -> None: tsv_content = fetch_clinvar_variant_summary_tsv(month, year) tsv_data = parse_tsv(tsv_content) version = f"{month}_{year}" if month and year else f"{date.today().month}_{date.today().year}" logger.info(f"Fetched TSV variant data for ClinVar for {version}.") - total_variants_with_clingen_ids = db.scalar(func.count(distinct(MappedVariant.clingen_allele_id))) - clingen_ids = db.scalars( - select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) - ).all() + if urns: + clingen_ids = db.scalars( + select(distinct(MappedVariant.clingen_allele_id)) + .join(Variant) + .join(ScoreSet) + .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) + .where(and_( + MappedVariant.clingen_allele_id.is_not(None), + MappedVariant.current == True, + ScoreSet.urn.in_(urns) + )) + ).all() + else: + clingen_ids = db.scalars( + select(distinct(MappedVariant.clingen_allele_id)).where(MappedVariant.clingen_allele_id.is_not(None)) + ).all() + total_variants_with_clingen_ids = len(clingen_ids) logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") for index, clingen_id in enumerate(clingen_ids): @@ -127,10 +142,11 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s @click.command() @with_database_session +@click.argument("urns", help="Optional list of score set URNs to process.", nargs=-1) @click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") @click.option("--year", default=None, help="Populate mapped variants for every score set in MaveDB.") -def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: Optional[str]) -> None: - refresh_clinvar_variants(db, month, year) +def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: Optional[str], urns: Sequence[str]) -> None: + refresh_clinvar_variants(db, month, year, urns) if __name__ == "__main__": From eca622186ba1aedf6d1a4076542691b6a78ffff1 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 10:55:46 -0700 Subject: [PATCH 131/166] Fixed: ClinVar control script sometimes throws an exception due to duplicate ClinVar variants associated with a mapped variant. --- src/mavedb/scripts/refresh_clinvar_variant_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index fd47e7fd..baa4a33c 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -131,6 +131,8 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s select(MappedVariant).where(MappedVariant.clingen_allele_id == clingen_id) ).all() for mapped_variant in variants_with_clingen_allele_id: + if clinvar_variant.id in [c.id for c in mapped_variant.clinical_controls]: + continue mapped_variant.clinical_controls.append(clinvar_variant) db.add(mapped_variant) From dd50b98c061640265fb4eac771cf27bfcc4402eb Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 10:56:16 -0700 Subject: [PATCH 132/166] Fixed: ClinVar control script fails on score sets with fewer than 100 variants. --- src/mavedb/scripts/refresh_clinvar_variant_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index baa4a33c..6923c8ff 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -90,7 +90,7 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s logger.info(f"Fetching ClinGen data for {total_variants_with_clingen_ids} variants.") for index, clingen_id in enumerate(clingen_ids): - if total_variants_with_clingen_ids > 0 and index % (total_variants_with_clingen_ids // 100) == 0: + if total_variants_with_clingen_ids > 0 and index % (max(total_variants_with_clingen_ids // 100, 1)) == 0: logger.info(f"Progress: {index / total_variants_with_clingen_ids:.0%}") # Guaranteed based on our query filters. From a29d94c84083d6605d0b5918f07df979d61350ca Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:00:55 -0700 Subject: [PATCH 133/166] Fixed MyPy error. --- src/mavedb/lib/types/clingen.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 0d8710a1..9705201b 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -1,4 +1,4 @@ -from typing import TypedDict, Literal +from typing import Optional, TypedDict, Literal from typing_extensions import NotRequired @@ -34,6 +34,7 @@ class LdhEvent(TypedDict): uuid: str sbj: EventSbj triggered: EventTrigger + iri: Optional[str] ### Linked Data Hub Content Types From 2977ea64f82e3fabf26822ddc82189cb0c029a5c Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 19 May 2025 10:35:51 -0700 Subject: [PATCH 134/166] Use Highlighted Variant in Score Set as entIri for ClinGen --- src/mavedb/lib/clingen/content_constructors.py | 3 ++- tests/lib/clingen/test_content_constructors.py | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index b9c35e27..3422fdb0 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -1,5 +1,6 @@ from datetime import datetime from uuid import uuid4 +from urllib.parse import quote_plus from mavedb import __version__ from mavedb.constants import MAVEDB_BASE_GIT, MAVEDB_FRONTEND_URL @@ -43,7 +44,7 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: MappedVari "score": variant.data["score_data"]["score"], # type: ignore }, "entId": variant.urn, # type: ignore - "entIri": f"{MAVEDB_FRONTEND_URL}/{variant.urn}", # type: ignore + "entIri": f"{MAVEDB_FRONTEND_URL}/score-sets/{quote_plus(variant.score_set.urn)}?variant={quote_plus(variant.urn)}", # type: ignore } ] } diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py index 35f73adc..2ba5b58a 100644 --- a/tests/lib/clingen/test_content_constructors.py +++ b/tests/lib/clingen/test_content_constructors.py @@ -1,5 +1,6 @@ from unittest.mock import patch from uuid import UUID +from urllib.parse import quote_plus from mavedb.constants import MAVEDB_BASE_GIT, MAVEDB_FRONTEND_URL from mavedb.lib.clingen.content_constructors import ( @@ -16,6 +17,7 @@ VALID_VARIANT_URN, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, + VALID_SCORE_SET_URN, ) @@ -64,7 +66,10 @@ def test_construct_ldh_submission_entity(mock_variant, mock_mapped_variant): assert mapping["entContent"]["score"] == 1.0 assert mapping["entId"] == VALID_VARIANT_URN - assert mapping["entIri"] == f"{MAVEDB_FRONTEND_URL}/{VALID_VARIANT_URN}" + assert ( + mapping["entIri"] + == f"{MAVEDB_FRONTEND_URL}/score-sets/{quote_plus(VALID_SCORE_SET_URN)}?variant={quote_plus(VALID_VARIANT_URN)}" + ) def test_construct_ldh_submission(mock_variant, mock_mapped_variant): From 4cdb97215921678528ac450d278a007efe15d885 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:07:04 -0700 Subject: [PATCH 135/166] Fixed: Optional mapping results in the wrong block of the ClinGen LDH submission --- src/mavedb/lib/clingen/content_constructors.py | 6 +++--- src/mavedb/lib/types/clingen.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index b805c1eb..39f93081 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -47,9 +47,9 @@ def construct_ldh_submission_entity(variant: Variant, mapped_variant: Optional[M ] } if mapped_variant is not None: - entity["MaveDBMapping"][0]["pre_mapped"] = mapped_variant.pre_mapped - entity["MaveDBMapping"][0]["post_mapped"] = mapped_variant.post_mapped - entity["MaveDBMapping"][0]["mapping_api_version"] = mapped_variant.mapping_api_version + entity["MaveDBMapping"][0]["entContent"]["pre_mapped"] = mapped_variant.pre_mapped + entity["MaveDBMapping"][0]["entContent"]["post_mapped"] = mapped_variant.post_mapped + entity["MaveDBMapping"][0]["entContent"]["mapping_api_version"] = mapped_variant.mapping_api_version return entity diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 9705201b..215df5cc 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -14,6 +14,7 @@ class EventSbj(TypedDict): type: str format: Literal["hgvs", "alleleRegistryID", "clinvarID", "geneSymbol"] add: bool + iri: Optional[str] # Who/what triggered the event @@ -34,7 +35,6 @@ class LdhEvent(TypedDict): uuid: str sbj: EventSbj triggered: EventTrigger - iri: Optional[str] ### Linked Data Hub Content Types From b9de11520a0a29dc34c2184d3abef0dc87af2502 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:16:28 -0700 Subject: [PATCH 136/166] Fixed MyPy errors. --- src/mavedb/lib/clingen/content_constructors.py | 2 +- src/mavedb/lib/types/clingen.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mavedb/lib/clingen/content_constructors.py b/src/mavedb/lib/clingen/content_constructors.py index 39f93081..10f9e859 100644 --- a/src/mavedb/lib/clingen/content_constructors.py +++ b/src/mavedb/lib/clingen/content_constructors.py @@ -32,7 +32,7 @@ def construct_ldh_submission_subject(hgvs: str) -> LdhContentSubject: def construct_ldh_submission_entity(variant: Variant, mapped_variant: Optional[MappedVariant]) -> LdhContentLinkedData: - entity = { + entity: LdhContentLinkedData = { # TODO#372: We try to make all possible fields that are non-nullable represented that way. "MaveDBMapping": [ { diff --git a/src/mavedb/lib/types/clingen.py b/src/mavedb/lib/types/clingen.py index 215df5cc..929e8ade 100644 --- a/src/mavedb/lib/types/clingen.py +++ b/src/mavedb/lib/types/clingen.py @@ -53,9 +53,9 @@ class LdhContentSubject(TypedDict): # The entities we are submitting class LdhMapping(TypedDict): mavedb_id: str - pre_mapped: str - post_mapped: str - mapping_api_version: str + pre_mapped: Optional[str] + post_mapped: Optional[str] + mapping_api_version: Optional[str] score: float From c75c71cb8ba5669781e4e119b6667c4bc8c825ad Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:22:29 -0700 Subject: [PATCH 137/166] Updated ClinGen submission unit tests. --- tests/lib/clingen/test_content_constructors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py index 35f73adc..95f168fb 100644 --- a/tests/lib/clingen/test_content_constructors.py +++ b/tests/lib/clingen/test_content_constructors.py @@ -42,6 +42,7 @@ def test_construct_ldh_submission_event(): "type": "Variant", "format": "hgvs", "add": True, + "iri": None } assert result["triggered"]["by"] == { "host": MAVEDB_BASE_GIT, From c16b2992a2c0279557536056b9270817a29b4ea9 Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:28:16 -0700 Subject: [PATCH 138/166] Fixed ClinVar script build & ruff errors. --- src/mavedb/scripts/refresh_clinvar_variant_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index 6923c8ff..c5ccdb28 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -78,7 +78,7 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) .where(and_( MappedVariant.clingen_allele_id.is_not(None), - MappedVariant.current == True, + MappedVariant.current, ScoreSet.urn.in_(urns) )) ).all() @@ -144,7 +144,7 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s @click.command() @with_database_session -@click.argument("urns", help="Optional list of score set URNs to process.", nargs=-1) +@click.argument("urns", nargs=-1) @click.option("--month", default=None, help="Populate mapped variants for every score set in MaveDB.") @click.option("--year", default=None, help="Populate mapped variants for every score set in MaveDB.") def refresh_clinvar_variants_command(db: Session, month: Optional[str], year: Optional[str], urns: Sequence[str]) -> None: From 3a6243caeb2badd93fd0f1d7e04666f872361cec Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Mon, 19 May 2025 11:30:38 -0700 Subject: [PATCH 139/166] Fixed MyPy error. --- src/mavedb/scripts/refresh_clinvar_variant_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mavedb/scripts/refresh_clinvar_variant_data.py b/src/mavedb/scripts/refresh_clinvar_variant_data.py index c5ccdb28..e6058751 100644 --- a/src/mavedb/scripts/refresh_clinvar_variant_data.py +++ b/src/mavedb/scripts/refresh_clinvar_variant_data.py @@ -78,7 +78,7 @@ def refresh_clinvar_variants(db: Session, month: Optional[str], year: Optional[s .where(MappedVariant.current.is_(True), MappedVariant.post_mapped.is_not(None)) .where(and_( MappedVariant.clingen_allele_id.is_not(None), - MappedVariant.current, + MappedVariant.current.is_(True), ScoreSet.urn.in_(urns) )) ).all() From 489c59cc6e75917514123a36273a48c6963d0dde Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Wed, 21 May 2025 10:35:05 +1000 Subject: [PATCH 140/166] Update tests/routers/test_score_set.py Co-authored-by: Benjamin Capodanno <31941502+bencap@users.noreply.github.com> --- tests/routers/test_score_set.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 19a711ae..a1a66b1e 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -14,10 +14,6 @@ cdot = pytest.importorskip("cdot") fastapi = pytest.importorskip("fastapi") -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") - from mavedb.lib.validation.urn_re import MAVEDB_TMP_URN_RE, MAVEDB_SCORE_SET_URN_RE, MAVEDB_EXPERIMENT_URN_RE from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.clinical_control import ClinicalControl From 80a92466960fab0453d33580d9b22bb4566eb24b Mon Sep 17 00:00:00 2001 From: Jeremy Stone <74574922+jstone-uw@users.noreply.github.com> Date: Wed, 21 May 2025 15:18:48 -0700 Subject: [PATCH 141/166] Added unit tests for ClinGen LDH submission without a mapped variant. --- .../lib/clingen/test_content_constructors.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/lib/clingen/test_content_constructors.py b/tests/lib/clingen/test_content_constructors.py index 95f168fb..e2f037ad 100644 --- a/tests/lib/clingen/test_content_constructors.py +++ b/tests/lib/clingen/test_content_constructors.py @@ -10,6 +10,7 @@ ) from mavedb.lib.clingen.constants import LDH_ENTITY_NAME, LDH_SUBMISSION_TYPE from mavedb import __version__ +import pytest from tests.helpers.constants import ( TEST_HGVS_IDENTIFIER, @@ -51,27 +52,37 @@ def test_construct_ldh_submission_event(): } -def test_construct_ldh_submission_entity(mock_variant, mock_mapped_variant): - result = construct_ldh_submission_entity(mock_variant, mock_mapped_variant) +@pytest.mark.parametrize("has_mapped_variant", [(True), (False)]) +def test_construct_ldh_submission_entity(mock_variant, mock_mapped_variant, has_mapped_variant: bool): + mapped_variant = mock_mapped_variant if has_mapped_variant else None + result = construct_ldh_submission_entity(mock_variant, mapped_variant) assert "MaveDBMapping" in result assert len(result["MaveDBMapping"]) == 1 mapping = result["MaveDBMapping"][0] assert mapping["entContent"]["mavedb_id"] == VALID_VARIANT_URN - assert mapping["entContent"]["pre_mapped"] == TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X - assert mapping["entContent"]["post_mapped"] == TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X - assert mapping["entContent"]["mapping_api_version"] == "pytest.mapping.1.0" assert mapping["entContent"]["score"] == 1.0 + if has_mapped_variant: + assert mapping["entContent"]["pre_mapped"] == TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X + assert mapping["entContent"]["post_mapped"] == TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X + assert mapping["entContent"]["mapping_api_version"] == "pytest.mapping.1.0" + else: + assert "pre_mapped" not in mapping["entContent"] + assert "post_mapped" not in mapping["entContent"] + assert "mapping_api_version" not in mapping["entContent"] + assert mapping["entId"] == VALID_VARIANT_URN assert mapping["entIri"] == f"{MAVEDB_FRONTEND_URL}/{VALID_VARIANT_URN}" -def test_construct_ldh_submission(mock_variant, mock_mapped_variant): +@pytest.mark.parametrize("has_mapped_variant", [(True), (False)]) +def test_construct_ldh_submission(mock_variant, mock_mapped_variant, has_mapped_variant: bool): + mapped_variant = mock_mapped_variant if has_mapped_variant else None variant_content = [ - (TEST_HGVS_IDENTIFIER, mock_variant, mock_mapped_variant), - (TEST_HGVS_IDENTIFIER, mock_variant, mock_mapped_variant), + (TEST_HGVS_IDENTIFIER, mock_variant, mapped_variant), + (TEST_HGVS_IDENTIFIER, mock_variant, mapped_variant), ] uuid_1 = UUID("12345678-1234-5678-1234-567812345678") From a68de0a79dfd33cf1d1da7f4808dac87c778c769 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Thu, 22 May 2025 11:06:06 -0700 Subject: [PATCH 142/166] Support mapper update for multi-target score sets --- src/mavedb/worker/jobs.py | 80 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index 74d645a0..b3511fd7 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -50,6 +50,7 @@ from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.published_variant import PublishedVariantsMV +from mavedb.models.target_gene import TargetGene from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.models.variant import Variant @@ -247,6 +248,12 @@ async def create_variants_for_score_set( # Mapping variants #################################################################################################### +ANNOTATION_LAYERS = { + "g": "genomic", + "p": "protein", + "c": "cdna", +} + @asynccontextmanager async def mapping_in_execution(redis: ArqRedis, job_id: str): @@ -397,48 +404,43 @@ async def map_variants_for_score_set( # TODO(VariantEffect/dcd-mapping2#3) after adding accession-based score set mapping support: # this also assumes that the score set is based on a target sequence, not a target accession - computed_genomic_ref = mapping_results.get("computed_genomic_reference_sequence") - mapped_genomic_ref = mapping_results.get("mapped_genomic_reference_sequence") - computed_protein_ref = mapping_results.get("computed_protein_reference_sequence") - mapped_protein_ref = mapping_results.get("mapped_protein_reference_sequence") - - if computed_genomic_ref: - target_sequence = computed_genomic_ref["sequence"] # noqa: F841 - elif computed_protein_ref: - target_sequence = computed_protein_ref["sequence"] # noqa: F841 - else: + reference_metadata = mapping_results.get("reference_sequences") + if not reference_metadata: raise NonexistentMappingReferenceError() - # TODO(VariantEffect/dcd_mapping2#2): Handle variant mappings for score sets with more than 1 target. - target_gene = score_set.target_genes[0] - - excluded_pre_mapped_keys = {"sequence"} - if computed_genomic_ref and mapped_genomic_ref: - pre_mapped_metadata = computed_genomic_ref - target_gene.pre_mapped_metadata = cast( - { - "genomic": { - k: pre_mapped_metadata[k] - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys - } - }, - JSONB, - ) - target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB) - elif computed_protein_ref and mapped_protein_ref: - pre_mapped_metadata = computed_protein_ref - target_gene.pre_mapped_metadata = cast( - { - "protein": { - k: pre_mapped_metadata[k] - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys + for target_gene_identifier in reference_metadata: + target_gene = db.scalars( + select( + TargetGene.where( + TargetGene.name == target_gene_identifier, TargetGene.score_set_id == score_set.id + ) + ) + ).one_or_none() + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) + # allow for multiple annotation layers + pre_mapped_metadata = {} + post_mapped_metadata = {} + excluded_pre_mapped_keys = {"sequence"} + for annotation_layer in reference_metadata[target_gene_identifier]: + layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get( + "computed_reference_sequence" + ) + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) + - excluded_pre_mapped_keys # TODO does this work if no 'sequence' key? } - }, - JSONB, - ) - target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB) - else: - raise NonexistentMappingReferenceError() + layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( + "mapped_reference_sequence" + ) + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) total_variants = 0 successful_mapped_variants = 0 From f44197ee6e274d20d7ff2c297719606e39c88dc1 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Fri, 23 May 2025 14:54:49 -0700 Subject: [PATCH 143/166] Add OddsPath Property to ScoreRange Data Model Adds an `OddsPath` property to the score range data model. This property is optional. If it is given, it should contain a `ratios` object with `abnormal` and `normal` (both floats), a `evidenceStrengths` property with `normal` and `abnormal` (both strings), and an optional `source` field. Uses the PublicationIdentifierBase view model for the OddsPath source. Adds a validator to the score set record that enforces this publication identifier has been included with the score set as either a primary or secondary publication identifier. Adds tests for this check and to ensure publications are added successfully. --- src/mavedb/view_models/odds_path.py | 40 ++++++++++++ src/mavedb/view_models/score_set.py | 31 ++++++++++ tests/helpers/constants.py | 94 +++++++++++++++++++++++++++++ tests/routers/test_score_set.py | 75 +++++++++++++++++++++-- tests/view_models/test_score_set.py | 36 ++++++++++- 5 files changed, 271 insertions(+), 5 deletions(-) create mode 100644 src/mavedb/view_models/odds_path.py diff --git a/src/mavedb/view_models/odds_path.py b/src/mavedb/view_models/odds_path.py new file mode 100644 index 00000000..094e1fa7 --- /dev/null +++ b/src/mavedb/view_models/odds_path.py @@ -0,0 +1,40 @@ +from typing import Literal, Optional, Sequence + +from mavedb.view_models import record_type_validator, set_record_type +from mavedb.view_models.base.base import BaseModel +from mavedb.view_models.publication_identifier import PublicationIdentifierBase + + +class OddsPathRatio(BaseModel): + normal: float + abnormal: float + + +class OddsPathEvidenceStrengths(BaseModel): + normal: Literal["BS3_STRONG"] + abnormal: Literal["PS3_STRONG"] + + +class OddsPathBase(BaseModel): + ratios: OddsPathRatio + evidence_strengths: OddsPathEvidenceStrengths + + +class OddsPathModify(OddsPathBase): + source: Optional[list[PublicationIdentifierBase]] = None + + +class OddsPathCreate(OddsPathModify): + pass + + +class SavedOddsPath(OddsPathBase): + record_type: str = None # type: ignore + + source: Optional[Sequence[PublicationIdentifierBase]] = None + + _record_type_factory = record_type_validator()(set_record_type) + + +class OddsPath(SavedOddsPath): + pass diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index 0ed722ae..bd06b856 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -21,6 +21,7 @@ DoiIdentifierCreate, SavedDoiIdentifier, ) +from mavedb.view_models.odds_path import OddsPath from mavedb.view_models.license import ShortLicense from mavedb.view_models.publication_identifier import ( PublicationIdentifier, @@ -79,6 +80,7 @@ def ranges_are_not_backwards(cls, field_value: tuple[Any]): class ScoreRanges(BaseModel): wt_score: Optional[float] ranges: list[ScoreRange] # type: ignore + odds_path: Optional[OddsPath] = None class ScoreSetGetter(PublicationIdentifiersGetter): @@ -292,6 +294,35 @@ def wild_type_score_in_normal_range(cls, field_value: Optional[ScoreRanges]): custom_loc=["body", "scoreRanges", "wtScore"], ) + @root_validator() + def validate_score_range_odds_path_source_in_publication_identifiers(cls, values): + score_ranges: Optional[ScoreRanges] = values.get("score_ranges") + if values.get("score_ranges") is None or score_ranges.odds_path is None: + return values + + if score_ranges.odds_path.source is None or len(score_ranges.odds_path.source) == 0: + return values + + for idx, pub in enumerate(score_ranges.odds_path.source): + primary_publication_identifiers = ( + values.get("primary_publication_identifiers", []) + if values.get("primary_publication_identifiers") + else [] + ) + secondary_publication_identifiers = ( + values.get("secondary_publication_identifiers", []) + if values.get("secondary_publication_identifiers") + else [] + ) + if pub not in [*primary_publication_identifiers, *secondary_publication_identifiers]: + raise ValidationError( + f"Odds path source publication identifier at index {idx} is not defined in score set publications. " + "To use a publication identifier in the odds path source, it must be defined in the primary or secondary publication identifiers.", + custom_loc=["body", "scoreRanges", "oddsPath", "source", idx], + ) + + return values + class ScoreSetCreate(ScoreSetModify): """View model for creating a new score set.""" diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 199ff1b4..ec2b3f91 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -862,12 +862,66 @@ } +TEST_ODDS_PATH = { + "ratios": { + "normal": 0.5, + "abnormal": 5.0, + }, + "evidence_strengths": { + "normal": "BS3_STRONG", + "abnormal": "PS3_STRONG", + }, + "source": None, +} + + +TEST_SAVED_ODDS_PATH = { + "recordType": "OddsPath", + "ratios": { + "normal": 0.5, + "abnormal": 5.0, + }, + "evidenceStrengths": { + "normal": "BS3_STRONG", + "abnormal": "PS3_STRONG", + }, +} + + +TEST_ODDS_PATH_WITH_SOURCE = { + "ratios": { + "normal": 0.5, + "abnormal": 5.0, + }, + "evidence_strengths": { + "normal": "BS3_STRONG", + "abnormal": "PS3_STRONG", + }, + "source": [{"identifier": TEST_PUBMED_IDENTIFIER, "db_name": "PubMed"}], +} + + +TEST_SAVED_ODDS_PATH_WITH_SOURCE = { + "recordType": "OddsPath", + "ratios": { + "normal": 0.5, + "abnormal": 5.0, + }, + "evidenceStrengths": { + "normal": "BS3_STRONG", + "abnormal": "PS3_STRONG", + }, + "source": [{"identifier": TEST_PUBMED_IDENTIFIER, "dbName": "PubMed"}], +} + + TEST_SCORE_SET_RANGE = { "wt_score": 1.0, "ranges": [ {"label": "test1", "classification": "normal", "range": (0, 2.0)}, {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, ], + "odds_path": None, } @@ -880,6 +934,46 @@ } +TEST_SCORE_SET_RANGE_WITH_ODDS_PATH = { + "wt_score": 1.0, + "ranges": [ + {"label": "test1", "classification": "normal", "range": (0, 2.0)}, + {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, + ], + "odds_path": TEST_ODDS_PATH, +} + + +TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH = { + "wtScore": 1.0, + "ranges": [ + {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, + {"label": "test2", "classification": "abnormal", "range": [-2.0, 0.0]}, + ], + "oddsPath": TEST_SAVED_ODDS_PATH, +} + + +TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE = { + "wt_score": 1.0, + "ranges": [ + {"label": "test1", "classification": "normal", "range": (0, 2.0)}, + {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, + ], + "odds_path": TEST_ODDS_PATH_WITH_SOURCE, +} + + +TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE = { + "wtScore": 1.0, + "ranges": [ + {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, + {"label": "test2", "classification": "abnormal", "range": [-2.0, 0.0]}, + ], + "oddsPath": TEST_SAVED_ODDS_PATH_WITH_SOURCE, +} + + TEST_SCORE_CALIBRATION = { "parameter_sets": [ { diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index a1a66b1e..d854af5e 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -32,9 +32,11 @@ TEST_MINIMAL_SEQ_SCORESET_RESPONSE, TEST_PUBMED_IDENTIFIER, TEST_ORCID_ID, + TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE, TEST_SCORE_SET_RANGE, TEST_SAVED_SCORE_SET_RANGE, TEST_MINIMAL_ACC_SCORESET_RESPONSE, + TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE, TEST_USER, TEST_INACTIVE_LICENSE, SAVED_DOI_IDENTIFIER, @@ -45,6 +47,8 @@ TEST_SAVED_SCORE_CALIBRATION, TEST_SAVED_CLINVAR_CONTROL, TEST_SAVED_GENERIC_CLINICAL_CONTROL, + TEST_SCORE_SET_RANGE_WITH_ODDS_PATH, + TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH, ) from tests.helpers.dependency_overrider import DependencyOverrider from tests.helpers.util.common import update_expected_response_for_created_resources @@ -141,11 +145,18 @@ def test_create_score_set_with_contributor(client, setup_router_db): assert response.status_code == 200 -def test_create_score_set_with_score_range(client, setup_router_db): +@pytest.mark.parametrize( + "score_ranges,saved_score_ranges", + [ + (TEST_SCORE_SET_RANGE, TEST_SAVED_SCORE_SET_RANGE), + (TEST_SCORE_SET_RANGE_WITH_ODDS_PATH, TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH), + ], +) +def test_create_score_set_with_score_range(client, setup_router_db, score_ranges, saved_score_ranges): experiment = create_experiment(client) score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set["experimentUrn"] = experiment["urn"] - score_set.update({"score_ranges": TEST_SCORE_SET_RANGE}) + score_set.update({"score_ranges": score_ranges}) response = client.post("/api/v1/score-sets/", json=score_set) assert response.status_code == 200 @@ -157,7 +168,7 @@ def test_create_score_set_with_score_range(client, setup_router_db): expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) - expected_response["scoreRanges"] = TEST_SAVED_SCORE_SET_RANGE + expected_response["scoreRanges"] = saved_score_ranges assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -167,6 +178,60 @@ def test_create_score_set_with_score_range(client, setup_router_db): assert response.status_code == 200 +@pytest.mark.parametrize("publication_list", ["primary_publication_identifiers", "secondary_publication_identifiers"]) +@pytest.mark.parametrize( + "mock_publication_fetch", + [({"dbName": "PubMed", "identifier": f"{TEST_PUBMED_IDENTIFIER}"})], + indirect=["mock_publication_fetch"], +) +def test_create_score_set_with_score_range_and_odds_path_source( + client, setup_router_db, publication_list, mock_publication_fetch +): + experiment = create_experiment(client) + score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set["experimentUrn"] = experiment["urn"] + score_set[publication_list] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path"]["source"] + score_set.update({"score_ranges": TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE}) + + response = client.post("/api/v1/score-sets/", json=score_set) + assert response.status_code == 200 + response_data = response.json() + + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["urn"]), re.Match) + + expected_response = update_expected_response_for_created_resources( + deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data + ) + expected_response[camelize(publication_list)] = [SAVED_PUBMED_PUBLICATION] + expected_response["scoreRanges"] = TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE + + assert sorted(expected_response.keys()) == sorted(response_data.keys()) + for key in expected_response: + assert (key, expected_response[key]) == (key, response_data[key]) + + response = client.get(f"/api/v1/score-sets/{response_data['urn']}") + assert response.status_code == 200 + + +def test_cannot_create_score_set_with_score_range_and_odds_path_source_when_publication_not_in_publications( + client, setup_router_db +): + experiment = create_experiment(client) + score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set["experimentUrn"] = experiment["urn"] + score_set.update({"score_ranges": TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE}) + + response = client.post("/api/v1/score-sets/", json=score_set) + assert response.status_code == 422 + + response_data = response.json() + assert ( + "Odds path source publication identifier at index 0 is not defined in score set publications." + in response_data["detail"][0]["msg"] + ) + + def test_remove_score_range_from_score_set(client, setup_router_db): experiment = create_experiment(client) score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) @@ -1166,7 +1231,9 @@ def test_multiple_score_set_meta_analysis_single_experiment( ) published_score_set_1_refresh = (client.get(f"/api/v1/score-sets/{published_score_set_1['urn']}")).json() - assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted([published_score_set_1["urn"], published_score_set_2["urn"]]) + assert meta_score_set["metaAnalyzesScoreSetUrns"] == sorted( + [published_score_set_1["urn"], published_score_set_2["urn"]] + ) assert published_score_set_1_refresh["metaAnalyzedByScoreSetUrns"] == [meta_score_set["urn"]] with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index 5f45bce0..74834421 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -3,7 +3,12 @@ from mavedb.view_models.publication_identifier import PublicationIdentifierCreate from mavedb.view_models.score_set import ScoreSetCreate, ScoreSetModify from mavedb.view_models.target_gene import TargetGeneCreate -from tests.helpers.constants import TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET +from tests.helpers.constants import ( + TEST_MINIMAL_ACC_SCORESET, + TEST_MINIMAL_SEQ_SCORESET, + TEST_SCORE_SET_RANGE_WITH_ODDS_PATH, + TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE, +) def test_cannot_create_score_set_without_a_target(): @@ -494,6 +499,35 @@ def test_can_create_score_set_with_any_range_classification(classification): ScoreSetModify(**score_set_test) +def test_can_create_score_set_with_odds_path_in_score_ranges(): + score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test["score_ranges"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH.copy() + + ScoreSetModify(**score_set_test) + + +def test_can_create_score_set_with_odds_path_and_source_in_score_ranges(): + score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test["primary_publication_identifiers"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path"][ + "source" + ] + score_set_test["score_ranges"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE.copy() + + ScoreSetModify(**score_set_test) + + +def test_cannot_create_score_set_with_odds_path_and_source_in_score_ranges_if_source_not_in_score_set_publications(): + score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() + score_set_test["score_ranges"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE.copy() + + with pytest.raises(ValueError) as exc_info: + ScoreSetModify(**score_set_test) + + assert "Odds path source publication identifier at index 0 is not defined in score set publications." in str( + exc_info.value + ) + + def test_cannot_create_score_set_with_inconsistent_base_editor_flags(): score_set_test = TEST_MINIMAL_ACC_SCORESET.copy() From e4bd7e4c055b5e0b4fa0287c967b6894935b151a Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Wed, 28 May 2025 14:49:42 +1000 Subject: [PATCH 144/166] Due to permission check and superseding problems, enriching experiment and score set in router functions. Found a bug in add_contributor function that leads to some tests can't pass. Fix that one first then I'll keep adding more tests. --- src/mavedb/lib/experiments.py | 30 +++++ src/mavedb/routers/experiment_sets.py | 11 +- src/mavedb/routers/experiments.py | 33 +++++- src/mavedb/routers/score_sets.py | 54 +++++++-- src/mavedb/view_models/experiment.py | 1 + src/mavedb/view_models/experiment_set.py | 1 + tests/routers/test_experiment_set.py | 120 +++++++++++++++++++ tests/routers/test_experiments.py | 144 ++++++++++++++++++++++- tests/routers/test_score_set.py | 16 ++- 9 files changed, 387 insertions(+), 23 deletions(-) create mode 100644 tests/routers/test_experiment_set.py diff --git a/src/mavedb/lib/experiments.py b/src/mavedb/lib/experiments.py index 3452152a..dd8a47f3 100644 --- a/src/mavedb/lib/experiments.py +++ b/src/mavedb/lib/experiments.py @@ -5,6 +5,8 @@ from sqlalchemy.orm import Session from mavedb.lib.logging.context import logging_context, save_to_logging_context +from mavedb.lib.permissions import Action +from mavedb.lib.score_sets import find_superseded_score_set_tail from mavedb.models.contributor import Contributor from mavedb.models.controlled_keyword import ControlledKeyword from mavedb.models.experiment import Experiment @@ -14,6 +16,7 @@ from mavedb.models.publication_identifier import PublicationIdentifier from mavedb.models.score_set import ScoreSet from mavedb.models.user import User +from mavedb.view_models import experiment from mavedb.view_models.search import ExperimentsSearch logger = logging.getLogger(__name__) @@ -125,3 +128,30 @@ def search_experiments( ) return items + + +def enrich_experiment_with_num_score_sets( + item_update: Experiment, user: Optional[User] +) -> experiment: + """ + Validate and update the number of score set in experiment. The superseded score set is excluded. + Data structure: experiment{score_set_urns, num_score_sets} + """ + filter_superseded_score_set_tails = [ + find_superseded_score_set_tail( + score_set, + Action.READ, + user + ) for score_set in item_update.score_sets + ] + filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None] + filtered_score_set_urns = [] + if filtered_score_sets: + filtered_score_set_urns = list(set([score_set.urn for score_set in filtered_score_sets])) + filtered_score_set_urns.sort() + + updated_experiment = experiment.Experiment.from_orm(item_update).copy(update={ + "num_score_sets": len(filtered_score_set_urns), + "score_set_urns": filtered_score_set_urns, + }) + return updated_experiment diff --git a/src/mavedb/routers/experiment_sets.py b/src/mavedb/routers/experiment_sets.py index 283b3188..a0bd120f 100644 --- a/src/mavedb/routers/experiment_sets.py +++ b/src/mavedb/routers/experiment_sets.py @@ -7,6 +7,7 @@ from mavedb import deps from mavedb.lib.authentication import UserData, get_current_user +from mavedb.lib.experiments import enrich_experiment_with_num_score_sets from mavedb.lib.logging import LoggedRoute from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.permissions import Action, has_permission @@ -51,5 +52,13 @@ def fetch_experiment_set( # Filter experiment sub-resources to only those experiments readable by the requesting user. item.experiments[:] = [exp for exp in item.experiments if has_permission(user_data, exp, Action.READ).permitted] + enriched_experiments = [ + enrich_experiment_with_num_score_sets(exp, user_data) + for exp in item.experiments + ] + enriched_item = experiment_set.ExperimentSet.from_orm(item).copy(update={ + "experiments": enriched_experiments, + "numExperiments": len(enriched_experiments) + }) - return item + return enriched_item diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index 458f40a5..c35d4b13 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -14,7 +14,7 @@ from mavedb.lib.authorization import require_current_user, require_current_user_with_email from mavedb.lib.contributors import find_or_create_contributor from mavedb.lib.exceptions import NonexistentOrcidUserError -from mavedb.lib.experiments import search_experiments as _search_experiments +from mavedb.lib.experiments import search_experiments as _search_experiments, enrich_experiment_with_num_score_sets from mavedb.lib.identifiers import ( find_or_create_doi_identifier, find_or_create_publication_identifier, @@ -89,7 +89,13 @@ def search_experiments(search: ExperimentsSearch, db: Session = Depends(deps.get """ Search experiments. """ - return _search_experiments(db, None, search) + items = _search_experiments(db, None, search) + if items: + items = [ + enrich_experiment_with_num_score_sets(exp, None) + for exp in items + ] + return items @router.post( @@ -105,7 +111,13 @@ def search_my_experiments( """ Search experiments created by the current user.. """ - return _search_experiments(db, user_data.user, search) + items = _search_experiments(db, user_data.user, search) + if items: + items = [ + enrich_experiment_with_num_score_sets(exp, user_data) + for exp in items + ] + return items @router.get( @@ -133,7 +145,9 @@ def fetch_experiment( raise HTTPException(status_code=404, detail=f"Experiment with URN {urn} not found") assert_permission(user_data, item, Action.READ) - return item + updated_experiment = enrich_experiment_with_num_score_sets(item, user_data) + + return updated_experiment @router.get( @@ -184,6 +198,13 @@ def get_experiment_score_sets( else: filtered_score_sets.sort(key=attrgetter("urn")) save_to_logging_context({"associated_resources": [item.urn for item in score_set_result]}) + enriched_score_sets = [] + for fs in filtered_score_sets: + enriched_experiment = enrich_experiment_with_num_score_sets(fs.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(fs).copy(update={"experiment": enriched_experiment}) + enriched_score_sets.append(response_item) + + return enriched_score_sets return filtered_score_sets @@ -429,7 +450,9 @@ async def update_experiment( db.refresh(item) save_to_logging_context({"updated_resource": item.urn}) - return item + updated_item = enrich_experiment_with_num_score_sets(item, user_data) + + return updated_item @router.delete("/experiments/{urn}", response_model=None, responses={422: {}}) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 65e83c0f..28855193 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -23,6 +23,7 @@ ) from mavedb.lib.contributors import find_or_create_contributor from mavedb.lib.exceptions import MixedTargetError, NonexistentOrcidUserError, ValidationError +from mavedb.lib.experiments import enrich_experiment_with_num_score_sets from mavedb.lib.identifiers import ( create_external_gene_identifier_offset, find_or_create_doi_identifier, @@ -135,7 +136,15 @@ def search_score_sets( Search score sets. """ score_sets = _search_score_sets(db, None, search) - return fetch_superseding_score_set_in_search_result(score_sets, user_data, search) + updated_score_sets = fetch_superseding_score_set_in_search_result(score_sets, user_data, search) + enriched_score_sets = [] + if updated_score_sets: + for u in updated_score_sets: + enriched_experiment = enrich_experiment_with_num_score_sets(u.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(u).copy(update={"experiment": enriched_experiment}) + enriched_score_sets.append(response_item) + + return enriched_score_sets @router.get("/score-sets/mapped-genes", status_code=200, response_model=dict[str, list[str]]) @@ -183,7 +192,15 @@ def search_my_score_sets( Search score sets created by the current user.. """ score_sets = _search_score_sets(db, user_data.user, search) - return fetch_superseding_score_set_in_search_result(score_sets, user_data, search) + updated_score_sets = fetch_superseding_score_set_in_search_result(score_sets, user_data, search) + enriched_score_sets = [] + if updated_score_sets: + for u in updated_score_sets: + enriched_experiment = enrich_experiment_with_num_score_sets(u.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(u).copy(update={"experiment": enriched_experiment}) + enriched_score_sets.append(response_item) + + return enriched_score_sets @router.get( @@ -203,7 +220,11 @@ async def show_score_set( Fetch a single score set by URN. """ save_to_logging_context({"requested_resource": urn}) - return await fetch_score_set_by_urn(db, urn, user_data, None, False) + item = await fetch_score_set_by_urn(db, urn, user_data, None, False) + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item @router.get( @@ -647,7 +668,11 @@ async def create_score_set( db.refresh(item) save_to_logging_context({"created_resource": item.urn}) - return item + + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item @router.post( @@ -711,7 +736,10 @@ async def upload_score_set_variant_data( db.add(item) db.commit() db.refresh(item) - return item + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item @router.post( @@ -748,7 +776,10 @@ async def update_score_set_calibration_data( db.refresh(item) save_to_logging_context({"updated_resource": item.urn}) - return item + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item @router.put( @@ -1004,7 +1035,11 @@ async def update_score_set( db.refresh(item) save_to_logging_context({"updated_resource": item.urn}) - return item + + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item @router.delete("/score-sets/{urn}", responses={422: {}}) @@ -1142,4 +1177,7 @@ async def publish_score_set( msg="Failed to enqueue published variant materialized view refresh job.", extra=logging_context() ) - return item + enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) + response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) + + return response_item diff --git a/src/mavedb/view_models/experiment.py b/src/mavedb/view_models/experiment.py index e0b50b5a..3b929e24 100644 --- a/src/mavedb/view_models/experiment.py +++ b/src/mavedb/view_models/experiment.py @@ -132,6 +132,7 @@ def publication_identifiers_validator(cls, value, values, field) -> list[Publica # Properties to return to non-admin clients class Experiment(SavedExperiment): + num_score_sets: Optional[int] = None score_set_urns: list[str] processing_state: Optional[str] doi_identifiers: Sequence[DoiIdentifier] diff --git a/src/mavedb/view_models/experiment_set.py b/src/mavedb/view_models/experiment_set.py index f2271df0..d9f0fcdf 100644 --- a/src/mavedb/view_models/experiment_set.py +++ b/src/mavedb/view_models/experiment_set.py @@ -45,6 +45,7 @@ class ExperimentSet(SavedExperimentSet): created_by: Optional[User] modified_by: Optional[User] experiments: Sequence[Experiment] + num_experiments: Optional[int] = None # Properties to return to admin clients diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py new file mode 100644 index 00000000..dd864e26 --- /dev/null +++ b/tests/routers/test_experiment_set.py @@ -0,0 +1,120 @@ +from mavedb.models.experiment import Experiment as ExperimentDbModel +from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel +from mavedb.models.score_set import ScoreSet as ScoreSetDbModel + +from tests.helpers.constants import ( + TEST_USER, +) +from tests.helpers.util import ( + add_contributor, + change_ownership, + create_experiment, + create_seq_score_set_with_variants, +) + + +def test_users_get_one_private_experiment_from_own_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == experiment["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 0 + + +def test_users_get_one_experiment_one_score_set_from_own_private_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == experiment["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 1 + assert score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] + + +# def test_users_get_one_experiment_one_score_set_from_others_private_experiment_set( +# session, data_provider, client, setup_router_db, data_files): +# experiment = create_experiment(client) +# score_set = create_seq_score_set_with_variants( +# client, session, data_provider, experiment["urn"], data_files / "scores.csv" +# ) +# change_ownership(session, score_set["urn"], ScoreSetDbModel) +# change_ownership(session, experiment["urn"], ExperimentDbModel) +# change_ownership(session, experiment["experimentSetUrn"], ExperimentSetDbModel) +# add_contributor( +# session, +# score_set["urn"], +# ScoreSetDbModel, +# TEST_USER["username"], +# TEST_USER["first_name"], +# TEST_USER["last_name"], +# ) +# add_contributor( +# session, +# experiment["urn"], +# ExperimentDbModel, +# TEST_USER["username"], +# TEST_USER["first_name"], +# TEST_USER["last_name"], +# ) +# add_contributor( +# session, +# experiment["experimentSetUrn"], +# ExperimentSetDbModel, +# TEST_USER["username"], +# TEST_USER["first_name"], +# TEST_USER["last_name"], +# ) +# response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") +# assert response.status_code == 200 +# response_data = response.json() +# assert response_data["numExperiments"] == 1 +# assert response_data["experiments"][0]["urn"] == experiment["urn"] +# assert response_data["experiments"][0]["numScoreSets"] == 1 +# assert score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] + + +def test_users_get_one_experiment_one_score_set_from_own_public_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + response = client.get(f"/api/v1/experiment-sets/{pub_score_set['experiment']['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == pub_score_set["experiment"]["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 1 + assert pub_score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] + + +def test_users_get_one_experiment_one_score_set_from_other_public_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + change_ownership(session, pub_score_set["urn"], ScoreSetDbModel) + change_ownership(session, pub_score_set['experiment']['urn'], ExperimentDbModel) + change_ownership(session, pub_score_set['experiment']['experimentSetUrn'], ExperimentSetDbModel) + response = client.get(f"/api/v1/experiment-sets/{pub_score_set['experiment']['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == pub_score_set["experiment"]["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 1 + assert pub_score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] \ No newline at end of file diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 199cd2b7..88720c55 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -53,7 +53,11 @@ def test_create_minimal_experiment(client, setup_router_db): assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["urn"]), re.Match) assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["experimentSetUrn"]), re.Match) expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) - expected_response.update({"urn": response_data["urn"], "experimentSetUrn": response_data["experimentSetUrn"]}) + expected_response.update({ + "urn": response_data["urn"], + "experimentSetUrn": response_data["experimentSetUrn"], + "numScoreSets": 0 + }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: assert (key, expected_response[key]) == (key, response_data[key]) @@ -74,7 +78,11 @@ def test_create_experiment_with_contributor(client, setup_router_db): assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["urn"]), re.Match) assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["experimentSetUrn"]), re.Match) expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) - expected_response.update({"urn": response_data["urn"], "experimentSetUrn": response_data["experimentSetUrn"]}) + expected_response.update({ + "urn": response_data["urn"], + "experimentSetUrn": response_data["experimentSetUrn"], + "numScoreSets": 0 + }) expected_response["contributors"] = [ { "recordType": "Contributor", @@ -96,7 +104,11 @@ def test_create_experiment_with_keywords(session, client, setup_router_db): assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["urn"]), re.Match) assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["experimentSetUrn"]), re.Match) expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) - expected_response.update({"urn": response_data["urn"], "experimentSetUrn": response_data["experimentSetUrn"]}) + expected_response.update({ + "urn": response_data["urn"], + "experimentSetUrn": response_data["experimentSetUrn"], + "numScoreSets": 0 + }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: assert (key, expected_response[key]) == (key, response_data[key]) @@ -389,7 +401,11 @@ def test_create_experiment_that_keywords_have_duplicate_others(client, setup_rou assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["urn"]), re.Match) assert isinstance(MAVEDB_TMP_URN_RE.fullmatch(response_data["experimentSetUrn"]), re.Match) expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_HAS_DUPLICATE_OTHERS_RESPONSE) - expected_response.update({"urn": response_data["urn"], "experimentSetUrn": response_data["experimentSetUrn"]}) + expected_response.update({ + "urn": response_data["urn"], + "experimentSetUrn": response_data["experimentSetUrn"], + "numScoreSets": 0 + }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: assert (key, expected_response[key]) == (key, response_data[key]) @@ -944,7 +960,11 @@ def test_create_experiment_with_invalid_primary_publication(client, setup_router def test_get_own_private_experiment(client, setup_router_db): experiment = create_experiment(client) expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) - expected_response.update({"urn": experiment["urn"], "experimentSetUrn": experiment["experimentSetUrn"]}) + expected_response.update({ + "urn": experiment["urn"], + "experimentSetUrn": experiment["experimentSetUrn"], + "numScoreSets": 0 + }) response = client.get(f"/api/v1/experiments/{experiment['urn']}") assert response.status_code == 200 response_data = response.json() @@ -977,7 +997,11 @@ def test_anonymous_cannot_get_users_private_experiment(session, client, anonymou def test_admin_can_get_other_users_private_experiment(client, admin_app_overrides, setup_router_db): experiment = create_experiment(client) expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) - expected_response.update({"urn": experiment["urn"], "experimentSetUrn": experiment["experimentSetUrn"]}) + expected_response.update({ + "urn": experiment["urn"], + "experimentSetUrn": experiment["experimentSetUrn"], + "numScoreSets": 0 + }) with DependencyOverrider(admin_app_overrides): response = client.get(f"/api/v1/experiments/{experiment['urn']}") @@ -989,6 +1013,114 @@ def test_admin_can_get_other_users_private_experiment(client, admin_app_override assert (key, expected_response[key]) == (key, response_data[key]) +def test_users_get_one_score_set_to_own_private_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + response = client.get(f"/api/v1/experiments/{experiment['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numScoreSets"] == 1 + assert score_set["urn"] in response_data["scoreSetUrns"] + + +def test_users_get_one_score_set_to_own_public_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numScoreSets"] == 1 + assert pub_score_set["urn"] in response_data["scoreSetUrns"] + + +def test_users_get_one_published_score_set_from_other_experiment(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + change_ownership(session, pub_score_set['experiment']['urn'], ExperimentDbModel) + change_ownership(session, pub_score_set["urn"], ScoreSetDbModel) + response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numScoreSets"] == 1 + assert pub_score_set["urn"] in response_data["scoreSetUrns"] + + +def test_users_get_one_published_score_set_from_others_experiment_with_a_private_score_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + score_set_2 = create_seq_score_set_with_variants( + client, session, data_provider, pub_score_set['experiment']['urn'], data_files / "scores.csv" + ) + change_ownership(session, score_set_2["urn"], ScoreSetDbModel) + response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numScoreSets"] == 1 + assert pub_score_set["urn"] in response_data["scoreSetUrns"] + + +def test_users_get_two_score_sets_from_own_experiment_with_a_private_and_a_published_score_sets( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set_1 = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + score_set_2 = create_seq_score_set_with_variants( + client, session, data_provider, pub_score_set['experiment']['urn'], data_files / "scores.csv" + ) + response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numScoreSets"] == 2 + assert pub_score_set["urn"] in response_data["scoreSetUrns"] + assert score_set_2["urn"] in response_data["scoreSetUrns"] + + +def test_users_get_one_score_set_from_own_experiment_with_a_superseding_score_sets( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert pub_score_set_response.status_code == 200 + pub_score_set = pub_score_set_response.json() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = pub_score_set["experiment"]["urn"] + score_set_post_payload["supersededScoreSetUrn"] = pub_score_set["urn"] + superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert superseding_score_set_response.status_code == 200 + superseding_score_set = superseding_score_set_response.json() + response = client.get(f"/api/v1/experiments/{superseding_score_set['experiment']['urn']}") + assert response.status_code == 200 + response_data = response.json() + # Only the superseding score set in experiment's score set list. + assert response_data["numScoreSets"] == 1 + assert superseding_score_set["urn"] in response_data["scoreSetUrns"] + assert pub_score_set["urn"] not in response_data["scoreSetUrns"] + + def test_search_experiments(session, client, setup_router_db): experiment = create_experiment(client) search_payload = {"text": experiment["shortDescription"]} diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 67c26b27..a93299f0 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -82,6 +82,7 @@ def test_create_minimal_score_set(client, setup_router_db): expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -120,6 +121,7 @@ def test_create_score_set_with_contributor(client, setup_router_db): "familyName": "User", } ] + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -146,6 +148,7 @@ def test_create_score_set_with_score_range(client, setup_router_db): deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) expected_response["scoreRanges"] = TEST_SAVED_SCORESET_RANGE + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -172,6 +175,7 @@ def test_remove_score_range_from_score_set(client, setup_router_db): deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) expected_response["scoreRanges"] = TEST_SAVED_SCORESET_RANGE + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -246,6 +250,7 @@ def test_can_update_score_set_data_before_publication( expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set ) + expected_response["experiment"].update({"numScoreSets": 1}) response = client.get(f"/api/v1/score-sets/{score_set['urn']}") assert response.status_code == 200 @@ -333,6 +338,7 @@ def test_can_update_score_set_supporting_data_after_publication( "processingState": ProcessingState.success.name, } ) + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -398,6 +404,7 @@ def test_cannot_update_score_set_target_data_after_publication( "processingState": ProcessingState.success.name, } ) + expected_response["experiment"].update({"numScoreSets": 1}) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -429,6 +436,7 @@ def test_get_own_private_score_set(client, setup_router_db): expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set ) + expected_response["experiment"].update({"numScoreSets": 1}) response = client.get(f"/api/v1/score-sets/{score_set['urn']}") assert response.status_code == 200 @@ -497,6 +505,7 @@ def test_contributor_can_get_other_users_private_score_set(session, client, setu "firstName": EXTRA_USER["first_name"], "lastName": EXTRA_USER["last_name"], } + expected_response["experiment"].update({"numScoreSets": 1}) response = client.get(f"/api/v1/score-sets/{score_set['urn']}") assert response.status_code == 200 @@ -513,7 +522,7 @@ def test_admin_can_get_other_user_private_score_set(session, client, admin_app_o expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set ) - + expected_response["experiment"].update({"numScoreSets": 1}) with DependencyOverrider(admin_app_overrides): response = client.get(f"/api/v1/score-sets/{score_set['urn']}") @@ -883,7 +892,7 @@ def test_publish_score_set(session, data_provider, client, setup_router_db, data expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data ) - expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) + expected_response["experiment"].update({"publishedDate": date.today().isoformat(), "numScoreSets": 1}) expected_response.update( { "urn": response_data["urn"], @@ -1031,7 +1040,7 @@ def test_contributor_can_publish_other_users_score_set(session, data_provider, c expected_response = update_expected_response_for_created_resources( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), response_data["experiment"], response_data ) - expected_response["experiment"].update({"publishedDate": date.today().isoformat()}) + expected_response["experiment"].update({"publishedDate": date.today().isoformat(), "numScoreSets": 1}) expected_response.update( { "urn": response_data["urn"], @@ -2312,6 +2321,7 @@ def test_admin_can_add_score_calibrations_to_score_set(client, setup_router_db, deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, score_set ) expected_response["scoreCalibrations"] = {"test_calibrations": deepcopy(TEST_SAVED_SCORE_CALIBRATION)} + expected_response["experiment"].update({"numScoreSets": 1}) assert response.status_code == 200 for key in expected_response: From 56f88ab7549cdb3f6ec44c829baa7a1a9c772df4 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Wed, 28 May 2025 17:12:14 +1000 Subject: [PATCH 145/166] Modified add_contributor function and add two more related tests. --- tests/helpers/constants.py | 11 +++++++++++ tests/helpers/util/contributor.py | 5 +++-- tests/routers/test_experiments.py | 28 ++++++++++++++++++++++++++ tests/routers/test_score_set.py | 33 +++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 2 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 199ff1b4..11800e21 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -177,6 +177,17 @@ "is_first_login": True, } +TEST_USER2 = { + "username": "1111-2222-3333-4444", + "first_name": "First", + "last_name": "Last", + "email": "test_user2@test.com", + "is_active": True, + "is_staff": False, + "is_superuser": False, + "is_first_login": True, +} + CONTRIBUTOR = { "orcid_id": TEST_USER["username"], "given_name": TEST_USER["first_name"], diff --git a/tests/helpers/util/contributor.py b/tests/helpers/util/contributor.py index 7ca05598..bded3773 100644 --- a/tests/helpers/util/contributor.py +++ b/tests/helpers/util/contributor.py @@ -12,11 +12,12 @@ def add_contributor(db: Session, urn: str, model: Any, orcid_id: str, given_name assert item is not None try: - contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).one() + contributor = db.query(Contributor).filter(Contributor.orcid_id == orcid_id).one() except NoResultFound: contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) db.add(contributor) - item.contributors = [contributor] + if contributor not in item.contributors: + item.contributors.append(contributor) db.add(item) db.commit() diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 6908a0ab..3ca3be17 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -36,6 +36,7 @@ TEST_PUBMED_IDENTIFIER, TEST_PUBMED_URL_IDENTIFIER, TEST_USER, + TEST_USER2, ) from tests.helpers.dependency_overrider import DependencyOverrider from tests.helpers.util.contributor import add_contributor @@ -712,6 +713,33 @@ def test_admin_can_update_other_users_private_experiment( assert (test_field, response_data[test_field]) == (test_field, test_value) +def test_can_add_two_contributors(session, client, setup_router_db): + experiment = create_experiment(client) + change_ownership(session, experiment["urn"], ExperimentDbModel) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER2["username"], + TEST_USER2["first_name"], + TEST_USER2["last_name"], + ) + response = client.get(f"/api/v1/experiments/{experiment['urn']}") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data["contributors"]) == 2 + assert any(c["orcidId"] == TEST_USER["username"] for c in response_data["contributors"]) + assert any(c["orcidId"] == TEST_USER2["username"] for c in response_data["contributors"]) + + def test_can_edit_published_experiment(client, setup_router_db): pass diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index a1a66b1e..60353010 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -467,6 +467,39 @@ def test_anonymous_user_cannot_get_user_private_score_set(session, client, setup assert f"score set with URN '{score_set['urn']}' not found" in response_data["detail"] +def test_can_add_contributor_in_both_experiment_and_score_set(session, client, setup_router_db): + experiment = create_experiment(client) + score_set = create_seq_score_set(client, experiment["urn"]) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + change_ownership(session, experiment["urn"], ExperimentDbModel) + add_contributor( + session, + score_set["urn"], + ScoreSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + score_set_response = client.get(f"/api/v1/score-sets/{score_set['urn']}") + assert score_set_response.status_code == 200 + ss_response_data = score_set_response.json() + assert len(ss_response_data["contributors"]) == 1 + assert any(c["orcidId"] == TEST_USER["username"] for c in ss_response_data["contributors"]) + experiment_response = client.get(f"/api/v1/experiments/{experiment['urn']}") + assert experiment_response.status_code == 200 + exp_response_data = experiment_response.json() + assert len(exp_response_data["contributors"]) == 1 + assert any(c["orcidId"] == TEST_USER["username"] for c in exp_response_data["contributors"]) + + def test_contributor_can_get_other_users_private_score_set(session, client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) From dfd9c0cfc14932358da7f5f4e211dc4551692b2d Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Wed, 28 May 2025 17:14:51 +1000 Subject: [PATCH 146/166] Remove unnecessary import --- tests/helpers/util/contributor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/helpers/util/contributor.py b/tests/helpers/util/contributor.py index bded3773..d63d5b34 100644 --- a/tests/helpers/util/contributor.py +++ b/tests/helpers/util/contributor.py @@ -1,5 +1,4 @@ from sqlalchemy.orm.exc import NoResultFound -from sqlalchemy import select from sqlalchemy.orm import Session from typing import Any From 3f119b9d611404f9b8461084b430b5801bfeacb1 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 28 May 2025 12:05:55 -0700 Subject: [PATCH 147/166] Store Odds Path Ratios alongside their Score Range --- src/mavedb/view_models/odds_path.py | 27 ++-- src/mavedb/view_models/score_range.py | 88 +++++++++++++ src/mavedb/view_models/score_set.py | 60 +++------ tests/helpers/constants.py | 172 +++++++++++++++----------- tests/routers/test_score_set.py | 2 +- tests/view_models/test_odds_path.py | 36 ++++++ tests/view_models/test_score_range.py | 106 ++++++++++++++++ tests/view_models/test_score_set.py | 35 +----- 8 files changed, 358 insertions(+), 168 deletions(-) create mode 100644 src/mavedb/view_models/score_range.py create mode 100644 tests/view_models/test_odds_path.py create mode 100644 tests/view_models/test_score_range.py diff --git a/src/mavedb/view_models/odds_path.py b/src/mavedb/view_models/odds_path.py index 094e1fa7..137a3131 100644 --- a/src/mavedb/view_models/odds_path.py +++ b/src/mavedb/view_models/odds_path.py @@ -1,27 +1,22 @@ -from typing import Literal, Optional, Sequence +from typing import Literal +from pydantic import validator from mavedb.view_models import record_type_validator, set_record_type from mavedb.view_models.base.base import BaseModel -from mavedb.view_models.publication_identifier import PublicationIdentifierBase - - -class OddsPathRatio(BaseModel): - normal: float - abnormal: float - - -class OddsPathEvidenceStrengths(BaseModel): - normal: Literal["BS3_STRONG"] - abnormal: Literal["PS3_STRONG"] class OddsPathBase(BaseModel): - ratios: OddsPathRatio - evidence_strengths: OddsPathEvidenceStrengths + ratio: float + evidence: Literal["BS3_STRONG", "PS3_STRONG"] class OddsPathModify(OddsPathBase): - source: Optional[list[PublicationIdentifierBase]] = None + @validator("ratio") + def ratio_must_be_positive(cls, value: float) -> float: + if value < 0: + raise ValueError("OddsPath value must be greater than or equal to 0") + + return value class OddsPathCreate(OddsPathModify): @@ -31,8 +26,6 @@ class OddsPathCreate(OddsPathModify): class SavedOddsPath(OddsPathBase): record_type: str = None # type: ignore - source: Optional[Sequence[PublicationIdentifierBase]] = None - _record_type_factory = record_type_validator()(set_record_type) diff --git a/src/mavedb/view_models/score_range.py b/src/mavedb/view_models/score_range.py new file mode 100644 index 00000000..95730edf --- /dev/null +++ b/src/mavedb/view_models/score_range.py @@ -0,0 +1,88 @@ +from typing import Optional, Literal, Any, Sequence +from pydantic import validator + +from mavedb.lib.validation.exceptions import ValidationError +from mavedb.lib.validation.utilities import inf_or_float +from mavedb.view_models import record_type_validator, set_record_type +from mavedb.view_models.base.base import BaseModel +from mavedb.view_models.publication_identifier import PublicationIdentifierBase +from mavedb.view_models.odds_path import OddsPathCreate, OddsPathBase, OddsPathModify, SavedOddsPath, OddsPath + + +### Range model + + +class ScoreRangeBase(BaseModel): + label: str + description: Optional[str] + classification: Literal["normal", "abnormal", "not_specified"] + # Purposefully vague type hint because of some odd JSON Schema generation behavior. + # Typing this as tuple[Union[float, None], Union[float, None]] will generate an invalid + # jsonschema, and fail all tests that access the schema. This may be fixed in pydantic v2, + # but it's unclear. Even just typing it as Tuple[Any, Any] will generate an invalid schema! + range: list[Any] # really: tuple[Union[float, None], Union[float, None]] + odds_path: Optional[OddsPathBase] = None + + +class ScoreRangeModify(ScoreRangeBase): + odds_path: Optional[OddsPathModify] = None + + @validator("range") + def ranges_are_not_backwards(cls, field_value: tuple[Any]): + if len(field_value) != 2: + raise ValidationError("Only a lower and upper bound are allowed.") + + field_value[0] = inf_or_float(field_value[0], True) if field_value[0] is not None else None + field_value[1] = inf_or_float(field_value[1], False) if field_value[1] is not None else None + + if inf_or_float(field_value[0], True) > inf_or_float(field_value[1], False): + raise ValidationError("The lower bound of the score range may not be larger than the upper bound.") + elif inf_or_float(field_value[0], True) == inf_or_float(field_value[1], False): + raise ValidationError("The lower and upper bound of the score range may not be the same.") + + return field_value + + +class ScoreRangeCreate(ScoreRangeModify): + odds_path: Optional[OddsPathCreate] = None + + +class SavedScoreRange(ScoreRangeBase): + record_type: str = None # type: ignore + + odds_path: Optional[SavedOddsPath] = None + + _record_type_factory = record_type_validator()(set_record_type) + + +class ScoreRange(SavedScoreRange): + odds_path: Optional[OddsPath] = None + + +### Ranges wrapper + + +class ScoreSetRangesBase(BaseModel): + wt_score: Optional[float] = None + ranges: Sequence[ScoreRangeBase] + odds_path_source: Optional[Sequence[PublicationIdentifierBase]] = None + + +class ScoreSetRangesModify(ScoreSetRangesBase): + ranges: Sequence[ScoreRangeModify] + + +class ScoreSetRangesCreate(ScoreSetRangesModify): + ranges: Sequence[ScoreRangeCreate] + + +class SavedScoreSetRanges(ScoreSetRangesBase): + record_type: str = None # type: ignore + + ranges: Sequence[SavedScoreRange] + + _record_type_factory = record_type_validator()(set_record_type) + + +class ScoreSetRanges(SavedScoreSetRanges): + ranges: Sequence[ScoreRange] diff --git a/src/mavedb/view_models/score_set.py b/src/mavedb/view_models/score_set.py index bd06b856..735bd77a 100644 --- a/src/mavedb/view_models/score_set.py +++ b/src/mavedb/view_models/score_set.py @@ -2,7 +2,7 @@ from __future__ import annotations from datetime import date -from typing import Any, Collection, Dict, Optional, Sequence, Literal +from typing import Any, Collection, Dict, Optional, Sequence from humps import camelize from pydantic import root_validator @@ -21,13 +21,13 @@ DoiIdentifierCreate, SavedDoiIdentifier, ) -from mavedb.view_models.odds_path import OddsPath from mavedb.view_models.license import ShortLicense from mavedb.view_models.publication_identifier import ( PublicationIdentifier, PublicationIdentifierCreate, SavedPublicationIdentifier, ) +from mavedb.view_models.score_range import SavedScoreSetRanges, ScoreSetRangesCreate, ScoreSetRanges from mavedb.view_models.target_gene import ( SavedTargetGene, ShortTargetGene, @@ -51,38 +51,6 @@ class Config: arbitrary_types_allowed = True -class ScoreRange(BaseModel): - label: str - description: Optional[str] - classification: Literal["normal", "abnormal", "not_specified"] - # Purposefully vague type hint because of some odd JSON Schema generation behavior. - # Typing this as tuple[Union[float, None], Union[float, None]] will generate an invalid - # jsonschema, and fail all tests that access the schema. This may be fixed in pydantic v2, - # but it's unclear. Even just typing it as Tuple[Any, Any] will generate an invalid schema! - range: list[Any] # really: tuple[Union[float, None], Union[float, None]] - - @validator("range") - def ranges_are_not_backwards(cls, field_value: tuple[Any]): - if len(field_value) != 2: - raise ValidationError("Only a lower and upper bound are allowed.") - - field_value[0] = inf_or_float(field_value[0], True) if field_value[0] is not None else None - field_value[1] = inf_or_float(field_value[1], False) if field_value[1] is not None else None - - if inf_or_float(field_value[0], True) > inf_or_float(field_value[1], False): - raise ValidationError("The lower bound of the score range may not be larger than the upper bound.") - elif inf_or_float(field_value[0], True) == inf_or_float(field_value[1], False): - raise ValidationError("The lower and upper bound of the score range may not be the same.") - - return field_value - - -class ScoreRanges(BaseModel): - wt_score: Optional[float] - ranges: list[ScoreRange] # type: ignore - odds_path: Optional[OddsPath] = None - - class ScoreSetGetter(PublicationIdentifiersGetter): def get(self, key: Any, default: Any = ...) -> Any: if key == "meta_analyzes_score_set_urns": @@ -112,7 +80,7 @@ class ScoreSetModify(ScoreSetBase): secondary_publication_identifiers: Optional[list[PublicationIdentifierCreate]] doi_identifiers: Optional[list[DoiIdentifierCreate]] target_genes: list[TargetGeneCreate] - score_ranges: Optional[ScoreRanges] + score_ranges: Optional[ScoreSetRangesCreate] @validator("title", "short_description", "abstract_text", "method_text") def validate_field_is_non_empty(cls, v): @@ -200,7 +168,7 @@ def target_accession_base_editor_targets_are_consistent(cls, field_value, values return field_value @validator("score_ranges") - def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreRanges]): + def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreSetRangesCreate]): if field_value is None: return None @@ -219,7 +187,9 @@ def score_range_labels_must_be_unique(cls, field_value: Optional[ScoreRanges]): return field_value @validator("score_ranges") - def score_range_normal_classification_exists_if_wild_type_score_provided(cls, field_value: Optional[ScoreRanges]): + def score_range_normal_classification_exists_if_wild_type_score_provided( + cls, field_value: Optional[ScoreSetRangesCreate] + ): if field_value is None: return None @@ -233,7 +203,7 @@ def score_range_normal_classification_exists_if_wild_type_score_provided(cls, fi return field_value @validator("score_ranges") - def ranges_do_not_overlap(cls, field_value: Optional[ScoreRanges]): + def ranges_do_not_overlap(cls, field_value: Optional[ScoreSetRangesCreate]): def test_overlap(tp1, tp2) -> bool: # Always check the tuple with the lowest lower bound. If we do not check # overlaps in this manner, checking the overlap of (0,1) and (1,2) will @@ -266,7 +236,7 @@ def test_overlap(tp1, tp2) -> bool: return field_value @validator("score_ranges") - def wild_type_score_in_normal_range(cls, field_value: Optional[ScoreRanges]): + def wild_type_score_in_normal_range(cls, field_value: Optional[ScoreSetRangesCreate]): if field_value is None: return None @@ -296,14 +266,14 @@ def wild_type_score_in_normal_range(cls, field_value: Optional[ScoreRanges]): @root_validator() def validate_score_range_odds_path_source_in_publication_identifiers(cls, values): - score_ranges: Optional[ScoreRanges] = values.get("score_ranges") - if values.get("score_ranges") is None or score_ranges.odds_path is None: + score_ranges: Optional[ScoreSetRangesCreate] = values.get("score_ranges") + if score_ranges is None or score_ranges.odds_path_source is None: return values - if score_ranges.odds_path.source is None or len(score_ranges.odds_path.source) == 0: + if not score_ranges.odds_path_source: return values - for idx, pub in enumerate(score_ranges.odds_path.source): + for idx, pub in enumerate(score_ranges.odds_path_source): primary_publication_identifiers = ( values.get("primary_publication_identifiers", []) if values.get("primary_publication_identifiers") @@ -445,7 +415,7 @@ class SavedScoreSet(ScoreSetBase): dataset_columns: Dict external_links: Dict[str, ExternalLink] contributors: list[Contributor] - score_ranges: Optional[ScoreRanges] + score_ranges: Optional[SavedScoreSetRanges] score_calibrations: Optional[dict[str, Calibration]] _record_type_factory = record_type_validator()(set_record_type) @@ -483,6 +453,7 @@ class ScoreSet(SavedScoreSet): processing_errors: Optional[dict] mapping_state: Optional[MappingState] mapping_errors: Optional[dict] + score_ranges: Optional[ScoreSetRanges] class ScoreSetWithVariants(ScoreSet): @@ -515,6 +486,7 @@ class ScoreSetPublicDump(SavedScoreSet): processing_errors: Optional[Dict] mapping_state: Optional[MappingState] mapping_errors: Optional[Dict] + score_ranges: Optional[ScoreSetRanges] # ruff: noqa: E402 diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index ec2b3f91..a3ad650a 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -862,115 +862,141 @@ } -TEST_ODDS_PATH = { - "ratios": { - "normal": 0.5, - "abnormal": 5.0, - }, - "evidence_strengths": { - "normal": "BS3_STRONG", - "abnormal": "PS3_STRONG", - }, - "source": None, +TEST_WT_SCORE = 1.0 + + +TEST_BS3_ODDS_PATH = { + "ratio": 0.5, + "evidence": "BS3_STRONG", } -TEST_SAVED_ODDS_PATH = { +TEST_PS3_ODDS_PATH = { + "ratio": 0.5, + "evidence": "BS3_STRONG", +} + +TEST_SAVED_BS3_ODDS_PATH = { "recordType": "OddsPath", - "ratios": { - "normal": 0.5, - "abnormal": 5.0, - }, - "evidenceStrengths": { - "normal": "BS3_STRONG", - "abnormal": "PS3_STRONG", - }, + "ratio": 0.5, + "evidence": "BS3_STRONG", } -TEST_ODDS_PATH_WITH_SOURCE = { - "ratios": { - "normal": 0.5, - "abnormal": 5.0, - }, - "evidence_strengths": { - "normal": "BS3_STRONG", - "abnormal": "PS3_STRONG", - }, - "source": [{"identifier": TEST_PUBMED_IDENTIFIER, "db_name": "PubMed"}], +TEST_SAVED_PS3_ODDS_PATH = { + "recordType": "OddsPath", + "ratio": 0.5, + "evidence": "BS3_STRONG", } -TEST_SAVED_ODDS_PATH_WITH_SOURCE = { - "recordType": "OddsPath", - "ratios": { - "normal": 0.5, - "abnormal": 5.0, - }, - "evidenceStrengths": { - "normal": "BS3_STRONG", - "abnormal": "PS3_STRONG", - }, - "source": [{"identifier": TEST_PUBMED_IDENTIFIER, "dbName": "PubMed"}], +TEST_SCORE_SET_NORMAL_RANGE = { + "label": "test1", + "classification": "normal", + "range": (0, 2.0), +} + + +TEST_SAVED_SCORE_SET_NORMAL_RANGE = { + "recordType": "ScoreRange", + "label": "test1", + "classification": "normal", + "range": [0.0, 2.0], +} + + +TEST_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH = { + "label": "test1", + "classification": "normal", + "range": (0, 2.0), + "odds_path": TEST_BS3_ODDS_PATH, +} + + +TEST_SAVED_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH = { + "recordType": "ScoreRange", + "label": "test1", + "classification": "normal", + "range": [0.0, 2.0], + "oddsPath": TEST_SAVED_BS3_ODDS_PATH, +} + + +TEST_SCORE_SET_ABNORMAL_RANGE = { + "label": "test2", + "classification": "abnormal", + "range": (-2.0, 0), +} + + +TEST_SAVED_SCORE_SET_ABNORMAL_RANGE = { + "recordType": "ScoreRange", + "label": "test2", + "classification": "abnormal", + "range": [-2.0, 0.0], +} + + +TEST_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH = { + "label": "test2", + "classification": "abnormal", + "range": (-2.0, 0), + "odds_path": TEST_PS3_ODDS_PATH, +} + + +TEST_SAVED_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH = { + "recordType": "ScoreRange", + "label": "test2", + "classification": "abnormal", + "range": [-2.0, 0.0], + "oddsPath": TEST_SAVED_PS3_ODDS_PATH, } TEST_SCORE_SET_RANGE = { - "wt_score": 1.0, + "wt_score": TEST_WT_SCORE, "ranges": [ - {"label": "test1", "classification": "normal", "range": (0, 2.0)}, - {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, + TEST_SCORE_SET_NORMAL_RANGE, + TEST_SCORE_SET_ABNORMAL_RANGE, ], - "odds_path": None, + "odds_path_source": None, } TEST_SAVED_SCORE_SET_RANGE = { - "wtScore": 1.0, - "ranges": [ - {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, - {"label": "test2", "classification": "abnormal", "range": [-2.0, 0.0]}, - ], + "recordType": "ScoreSetRanges", + "wtScore": TEST_WT_SCORE, + "ranges": [TEST_SAVED_SCORE_SET_NORMAL_RANGE, TEST_SAVED_SCORE_SET_ABNORMAL_RANGE], } TEST_SCORE_SET_RANGE_WITH_ODDS_PATH = { - "wt_score": 1.0, - "ranges": [ - {"label": "test1", "classification": "normal", "range": (0, 2.0)}, - {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, - ], - "odds_path": TEST_ODDS_PATH, + "wt_score": TEST_WT_SCORE, + "ranges": [TEST_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, TEST_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH], + "odds_path_source": None, } TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH = { - "wtScore": 1.0, - "ranges": [ - {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, - {"label": "test2", "classification": "abnormal", "range": [-2.0, 0.0]}, - ], - "oddsPath": TEST_SAVED_ODDS_PATH, + "recordType": "ScoreSetRanges", + "wtScore": TEST_WT_SCORE, + "ranges": [TEST_SAVED_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, TEST_SAVED_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH], } TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE = { - "wt_score": 1.0, - "ranges": [ - {"label": "test1", "classification": "normal", "range": (0, 2.0)}, - {"label": "test2", "classification": "abnormal", "range": (-2.0, 0)}, - ], - "odds_path": TEST_ODDS_PATH_WITH_SOURCE, + "wt_score": TEST_WT_SCORE, + "ranges": [TEST_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, TEST_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH], + "odds_path_source": [{"identifier": TEST_PUBMED_IDENTIFIER, "db_name": "PubMed"}], } TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE = { - "wtScore": 1.0, - "ranges": [ - {"label": "test1", "classification": "normal", "range": [0.0, 2.0]}, - {"label": "test2", "classification": "abnormal", "range": [-2.0, 0.0]}, - ], - "oddsPath": TEST_SAVED_ODDS_PATH_WITH_SOURCE, + "recordType": "ScoreSetRanges", + "wtScore": TEST_WT_SCORE, + "ranges": [TEST_SAVED_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, TEST_SAVED_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH], + "oddsPathSource": [{"identifier": TEST_PUBMED_IDENTIFIER, "dbName": "PubMed"}], } diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index d854af5e..1e52ee6a 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -190,7 +190,7 @@ def test_create_score_set_with_score_range_and_odds_path_source( experiment = create_experiment(client) score_set = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set["experimentUrn"] = experiment["urn"] - score_set[publication_list] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path"]["source"] + score_set[publication_list] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path_source"] score_set.update({"score_ranges": TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE}) response = client.post("/api/v1/score-sets/", json=score_set) diff --git a/tests/view_models/test_odds_path.py b/tests/view_models/test_odds_path.py new file mode 100644 index 00000000..587935ab --- /dev/null +++ b/tests/view_models/test_odds_path.py @@ -0,0 +1,36 @@ +import pytest +from pydantic import ValidationError + +from mavedb.view_models.odds_path import OddsPathBase, OddsPathModify, OddsPathCreate + +from tests.helpers.constants import TEST_BS3_ODDS_PATH, TEST_PS3_ODDS_PATH + + +@pytest.mark.parametrize("valid_data", [TEST_BS3_ODDS_PATH, TEST_PS3_ODDS_PATH]) +def test_odds_path_base_valid_data(valid_data): + odds_path = OddsPathBase(**valid_data) + assert odds_path.ratio == valid_data["ratio"] + assert odds_path.evidence == valid_data["evidence"] + + +@pytest.mark.parametrize("valid_data", [TEST_BS3_ODDS_PATH, TEST_PS3_ODDS_PATH]) +def test_odds_path_base_invalid_data(valid_data): + odds_path = OddsPathModify(**valid_data) + assert odds_path.ratio == valid_data["ratio"] + assert odds_path.evidence == valid_data["evidence"] + + +def test_odds_path_modify_invalid_ratio(): + invalid_data = { + "ratio": -1.0, + "evidence": "BS3_STRONG", + } + with pytest.raises(ValidationError, match="OddsPath value must be greater than or equal to 0"): + OddsPathModify(**invalid_data) + + +@pytest.mark.parametrize("valid_data", [TEST_BS3_ODDS_PATH, TEST_PS3_ODDS_PATH]) +def test_odds_path_create_valid(valid_data): + odds_path = OddsPathCreate(**valid_data) + assert odds_path.ratio == valid_data["ratio"] + assert odds_path.evidence == valid_data["evidence"] diff --git a/tests/view_models/test_score_range.py b/tests/view_models/test_score_range.py new file mode 100644 index 00000000..04f94f14 --- /dev/null +++ b/tests/view_models/test_score_range.py @@ -0,0 +1,106 @@ +import pytest +from pydantic import ValidationError + +from mavedb.view_models.score_range import ScoreRangeModify, ScoreRangeCreate, ScoreSetRangesCreate + +from tests.helpers.constants import ( + TEST_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, + TEST_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH, + TEST_SCORE_SET_RANGE, + TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE, +) + + +def test_score_range_modify_valid_range(): + valid_data = { + "label": "Test Range", + "classification": "normal", + "range": [0.0, 1.0], + } + score_range = ScoreRangeModify(**valid_data) + assert score_range.range == [0.0, 1.0] + + +def test_score_range_modify_invalid_range_length(): + invalid_data = { + "label": "Test Range", + "classification": "normal", + "range": [0.0], + } + with pytest.raises(ValidationError, match="Only a lower and upper bound are allowed."): + ScoreRangeModify(**invalid_data) + + +def test_score_range_modify_invalid_range_order(): + invalid_data = { + "label": "Test Range", + "classification": "normal", + "range": [1.0, 0.0], + } + with pytest.raises( + ValidationError, match="The lower bound of the score range may not be larger than the upper bound." + ): + ScoreRangeModify(**invalid_data) + + +def test_score_range_modify_equal_bounds(): + invalid_data = { + "label": "Test Range", + "classification": "normal", + "range": [1.0, 1.0], + } + with pytest.raises(ValidationError, match="The lower and upper bound of the score range may not be the same."): + ScoreRangeModify(**invalid_data) + + +@pytest.mark.parametrize( + "valid_data", [TEST_SCORE_SET_NORMAL_RANGE_WITH_ODDS_PATH, TEST_SCORE_SET_ABNORMAL_RANGE_WITH_ODDS_PATH] +) +def test_score_range_create_with_odds_path(valid_data): + score_range = ScoreRangeCreate(**valid_data) + assert score_range.odds_path.ratio == valid_data["odds_path"]["ratio"] + assert score_range.odds_path.evidence == valid_data["odds_path"]["evidence"] + + +def test_score_ranges_create_valid(): + score_ranges = ScoreSetRangesCreate(**TEST_SCORE_SET_RANGE) + assert len(score_ranges.ranges) == 2 + assert score_ranges.ranges[0].label == TEST_SCORE_SET_RANGE["ranges"][0]["label"] + assert score_ranges.ranges[1].classification == TEST_SCORE_SET_RANGE["ranges"][1]["classification"] + + +def test_score_ranges_create_valid_with_odds_path_source(): + score_ranges = ScoreSetRangesCreate(**TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE) + assert len(score_ranges.ranges) == 2 + assert score_ranges.ranges[0].label == TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["ranges"][0]["label"] + assert ( + score_ranges.ranges[1].classification + == TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["ranges"][1]["classification"] + ) + assert ( + score_ranges.odds_path_source[0].identifier + == TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path_source"][0]["identifier"] + ) + assert len(score_ranges.odds_path_source) == 1 + + +def test_score_ranges_create_invalid_range(): + invalid_data = { + "wt_score": 0.5, + "ranges": [ + { + "label": "Range 1", + "classification": "normal", + "range": [0.0, 1.0], + }, + { + "label": "Range 2", + "classification": "abnormal", + "range": [2.0, 1.0], + }, + ], + } + with pytest.raises( + ValidationError, match="The lower bound of the score range may not be larger than the upper bound." + ): + ScoreSetRangesCreate(**invalid_data) diff --git a/tests/view_models/test_score_set.py b/tests/view_models/test_score_set.py index 74834421..48b33c1b 100644 --- a/tests/view_models/test_score_set.py +++ b/tests/view_models/test_score_set.py @@ -336,37 +336,6 @@ def test_cannot_create_score_set_with_overlapping_lower_unbounded_ranges(): assert "Score ranges may not overlap; `range_1` overlaps with `range_2`" in str(exc_info.value) -def test_cannot_create_score_set_with_backwards_bounds(): - score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - score_set_test["score_ranges"] = { - "wt_score": 0.5, - "ranges": [ - {"label": "range_1", "classification": "normal", "range": (1, 0)}, - {"label": "range_2", "classification": "abnormal", "range": (2, 1)}, - ], - } - - with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**score_set_test) - - assert "The lower bound of the score range may not be larger than the upper bound." in str(exc_info.value) - - -def test_cannot_create_score_set_with_equal_bounds(): - score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - score_set_test["score_ranges"] = { - "wt_score": 1, - "ranges": [ - {"label": "range_1", "classification": "normal", "range": (-1, -1)}, - ], - } - - with pytest.raises(ValueError) as exc_info: - ScoreSetModify(**score_set_test) - - assert "The lower and upper bound of the score range may not be the same." in str(exc_info.value) - - def test_cannot_create_score_set_with_duplicate_range_labels(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() score_set_test["score_ranges"] = { @@ -508,8 +477,8 @@ def test_can_create_score_set_with_odds_path_in_score_ranges(): def test_can_create_score_set_with_odds_path_and_source_in_score_ranges(): score_set_test = TEST_MINIMAL_SEQ_SCORESET.copy() - score_set_test["primary_publication_identifiers"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE["odds_path"][ - "source" + score_set_test["primary_publication_identifiers"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE[ + "odds_path_source" ] score_set_test["score_ranges"] = TEST_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE.copy() From 8fc7e8aa73a66d18c753821e14b148f0964c1758 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Thu, 29 May 2025 11:03:33 +1000 Subject: [PATCH 148/166] Modify the db.execute code. --- tests/helpers/util/contributor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/helpers/util/contributor.py b/tests/helpers/util/contributor.py index d63d5b34..b2115093 100644 --- a/tests/helpers/util/contributor.py +++ b/tests/helpers/util/contributor.py @@ -1,4 +1,5 @@ from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy import select from sqlalchemy.orm import Session from typing import Any @@ -11,7 +12,8 @@ def add_contributor(db: Session, urn: str, model: Any, orcid_id: str, given_name assert item is not None try: - contributor = db.query(Contributor).filter(Contributor.orcid_id == orcid_id).one() + # scalar_one(): extract the ORM instance from the result + contributor = db.execute(select(Contributor).where(Contributor.orcid_id == orcid_id)).scalar_one() except NoResultFound: contributor = Contributor(orcid_id=orcid_id, given_name=given_name, family_name=family_name) db.add(contributor) From fe5b29d5adc74f012ac727001d7d687839aa92dc Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Thu, 29 May 2025 09:27:51 -0700 Subject: [PATCH 149/166] Add all ACMG evidence codes, make OddsPath evidence optional --- src/mavedb/view_models/odds_path.py | 15 +++++++++++++-- tests/view_models/test_odds_path.py | 9 +++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/mavedb/view_models/odds_path.py b/src/mavedb/view_models/odds_path.py index 137a3131..f17dddd1 100644 --- a/src/mavedb/view_models/odds_path.py +++ b/src/mavedb/view_models/odds_path.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Literal, Optional from pydantic import validator from mavedb.view_models import record_type_validator, set_record_type @@ -7,7 +7,18 @@ class OddsPathBase(BaseModel): ratio: float - evidence: Literal["BS3_STRONG", "PS3_STRONG"] + evidence: Optional[ + Literal[ + "BS3_STRONG", + "BS3_MODERATE", + "BS3_SUPPORTING", + "INDETERMINATE", + "PS3_VERY_STRONG", + "PS3_STRONG", + "PS3_MODERATE", + "PS3_SUPPORTING", + ] + ] = None class OddsPathModify(OddsPathBase): diff --git a/tests/view_models/test_odds_path.py b/tests/view_models/test_odds_path.py index 587935ab..93585bef 100644 --- a/tests/view_models/test_odds_path.py +++ b/tests/view_models/test_odds_path.py @@ -13,6 +13,15 @@ def test_odds_path_base_valid_data(valid_data): assert odds_path.evidence == valid_data["evidence"] +def test_odds_path_base_no_evidence(): + odds_with_no_evidence = TEST_BS3_ODDS_PATH.copy() + odds_with_no_evidence["evidence"] = None + + odds_path = OddsPathBase(**odds_with_no_evidence) + assert odds_path.ratio == odds_with_no_evidence["ratio"] + assert odds_path.evidence is None + + @pytest.mark.parametrize("valid_data", [TEST_BS3_ODDS_PATH, TEST_PS3_ODDS_PATH]) def test_odds_path_base_invalid_data(valid_data): odds_path = OddsPathModify(**valid_data) From 991bbfe5ee107a3d80405f196df46d99fb570462 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 May 2025 13:21:35 +1000 Subject: [PATCH 150/166] Add and modify some tests. Modify part of the codes. --- src/mavedb/lib/experiments.py | 9 +- src/mavedb/routers/score_sets.py | 2 +- tests/routers/test_experiment_set.py | 168 ++++++++++++++++++--------- tests/routers/test_experiments.py | 55 +++++---- 4 files changed, 147 insertions(+), 87 deletions(-) diff --git a/src/mavedb/lib/experiments.py b/src/mavedb/lib/experiments.py index dd8a47f3..bb68df02 100644 --- a/src/mavedb/lib/experiments.py +++ b/src/mavedb/lib/experiments.py @@ -1,9 +1,10 @@ import logging -from typing import Optional +from typing import Any, Optional from sqlalchemy import func, or_, not_ from sqlalchemy.orm import Session +from mavedb.lib.authentication import UserData from mavedb.lib.logging.context import logging_context, save_to_logging_context from mavedb.lib.permissions import Action from mavedb.lib.score_sets import find_superseded_score_set_tail @@ -131,8 +132,8 @@ def search_experiments( def enrich_experiment_with_num_score_sets( - item_update: Experiment, user: Optional[User] -) -> experiment: + item_update: Experiment, user_data: Optional[UserData] +) -> Any: """ Validate and update the number of score set in experiment. The superseded score set is excluded. Data structure: experiment{score_set_urns, num_score_sets} @@ -141,7 +142,7 @@ def enrich_experiment_with_num_score_sets( find_superseded_score_set_tail( score_set, Action.READ, - user + user_data ) for score_set in item_update.score_sets ] filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None] diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 05c22499..16e7a192 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -73,7 +73,7 @@ async def fetch_score_set_by_urn( db, urn: str, user: Optional[UserData], owner_or_contributor: Optional[UserData], only_published: bool -) -> Optional[ScoreSet]: +) -> ScoreSet: """ Fetch one score set by URN, ensuring that the user has read permission. diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index dd864e26..73fb8acc 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -1,3 +1,8 @@ +from unittest.mock import patch +import pytest + +arq = pytest.importorskip("arq") + from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel @@ -5,16 +10,13 @@ from tests.helpers.constants import ( TEST_USER, ) -from tests.helpers.util import ( - add_contributor, - change_ownership, - create_experiment, - create_seq_score_set_with_variants, -) +from tests.helpers.util.contributor import add_contributor +from tests.helpers.util.experiment import create_experiment +from tests.helpers.util.score_set import create_seq_score_set_with_variants, publish_score_set +from tests.helpers.util.user import change_ownership -def test_users_get_one_private_experiment_from_own_experiment_set( - session, data_provider, client, setup_router_db, data_files): +def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): experiment = create_experiment(client) response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") assert response.status_code == 200 @@ -39,46 +41,102 @@ def test_users_get_one_experiment_one_score_set_from_own_private_experiment_set( assert score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] -# def test_users_get_one_experiment_one_score_set_from_others_private_experiment_set( -# session, data_provider, client, setup_router_db, data_files): -# experiment = create_experiment(client) -# score_set = create_seq_score_set_with_variants( -# client, session, data_provider, experiment["urn"], data_files / "scores.csv" -# ) -# change_ownership(session, score_set["urn"], ScoreSetDbModel) -# change_ownership(session, experiment["urn"], ExperimentDbModel) -# change_ownership(session, experiment["experimentSetUrn"], ExperimentSetDbModel) -# add_contributor( -# session, -# score_set["urn"], -# ScoreSetDbModel, -# TEST_USER["username"], -# TEST_USER["first_name"], -# TEST_USER["last_name"], -# ) -# add_contributor( -# session, -# experiment["urn"], -# ExperimentDbModel, -# TEST_USER["username"], -# TEST_USER["first_name"], -# TEST_USER["last_name"], -# ) -# add_contributor( -# session, -# experiment["experimentSetUrn"], -# ExperimentSetDbModel, -# TEST_USER["username"], -# TEST_USER["first_name"], -# TEST_USER["last_name"], -# ) -# response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") -# assert response.status_code == 200 -# response_data = response.json() -# assert response_data["numExperiments"] == 1 -# assert response_data["experiments"][0]["urn"] == experiment["urn"] -# assert response_data["experiments"][0]["numScoreSets"] == 1 -# assert score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] +def test_users_get_one_experiment_one_score_set_from_others_private_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + change_ownership(session, experiment["urn"], ExperimentDbModel) + change_ownership(session, experiment["experimentSetUrn"], ExperimentSetDbModel) + add_contributor( + session, + score_set["urn"], + ScoreSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["experimentSetUrn"], + ExperimentSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == experiment["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 1 + assert score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] + + +def test_users_get_one_experiment_none_score_set_from_others_private_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + change_ownership(session, experiment["urn"], ExperimentDbModel) + change_ownership(session, experiment["experimentSetUrn"], ExperimentSetDbModel) + add_contributor( + session, + experiment["urn"], + ExperimentDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + add_contributor( + session, + experiment["experimentSetUrn"], + ExperimentSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 1 + assert response_data["experiments"][0]["urn"] == experiment["urn"] + assert response_data["experiments"][0]["numScoreSets"] == 0 + + +def test_users_get_none_experiment_none_score_set_from_others_private_experiment_set( + session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + change_ownership(session, experiment["urn"], ExperimentDbModel) + change_ownership(session, experiment["experimentSetUrn"], ExperimentSetDbModel) + add_contributor( + session, + experiment["experimentSetUrn"], + ExperimentSetDbModel, + TEST_USER["username"], + TEST_USER["first_name"], + TEST_USER["last_name"], + ) + response = client.get(f"/api/v1/experiment-sets/{experiment['experimentSetUrn']}") + assert response.status_code == 200 + response_data = response.json() + assert response_data["numExperiments"] == 0 def test_users_get_one_experiment_one_score_set_from_own_public_experiment_set( @@ -87,9 +145,10 @@ def test_users_get_one_experiment_one_score_set_from_own_public_experiment_set( score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + response = client.get(f"/api/v1/experiment-sets/{pub_score_set['experiment']['experimentSetUrn']}") assert response.status_code == 200 response_data = response.json() @@ -105,9 +164,10 @@ def test_users_get_one_experiment_one_score_set_from_other_public_experiment_set score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, pub_score_set["urn"], ScoreSetDbModel) change_ownership(session, pub_score_set['experiment']['urn'], ExperimentDbModel) change_ownership(session, pub_score_set['experiment']['experimentSetUrn'], ExperimentSetDbModel) diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index d12701af..8c327845 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -42,7 +42,7 @@ from tests.helpers.util.contributor import add_contributor from tests.helpers.util.user import change_ownership from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_seq_score_set, publish_score_set +from tests.helpers.util.score_set import create_seq_score_set, create_seq_score_set_with_variants, publish_score_set from tests.helpers.util.variant import mock_worker_variant_insertion @@ -1063,11 +1063,9 @@ def test_admin_can_get_other_users_private_experiment(client, admin_app_override assert (key, expected_response[key]) == (key, response_data[key]) -def test_users_get_one_score_set_to_own_private_experiment(session, data_provider, client, setup_router_db, data_files): +def test_users_get_one_score_set_to_own_private_experiment(client, setup_router_db): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( - client, session, data_provider, experiment["urn"], data_files / "scores.csv" - ) + score_set = create_seq_score_set(client, experiment["urn"]) response = client.get(f"/api/v1/experiments/{experiment['urn']}") assert response.status_code == 200 response_data = response.json() @@ -1077,34 +1075,35 @@ def test_users_get_one_score_set_to_own_private_experiment(session, data_provide def test_users_get_one_score_set_to_own_public_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( + unpublished_score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() - response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + + response = client.get(f"/api/v1/experiments/{score_set['experiment']['urn']}") assert response.status_code == 200 response_data = response.json() assert response_data["numScoreSets"] == 1 - assert pub_score_set["urn"] in response_data["scoreSetUrns"] + assert score_set["urn"] in response_data["scoreSetUrns"] def test_users_get_one_published_score_set_from_other_experiment(session, data_provider, client, setup_router_db, data_files): experiment = create_experiment(client) - score_set = create_seq_score_set_with_variants( + unpublished_score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() - change_ownership(session, pub_score_set['experiment']['urn'], ExperimentDbModel) - change_ownership(session, pub_score_set["urn"], ScoreSetDbModel) - response = client.get(f"/api/v1/experiments/{pub_score_set['experiment']['urn']}") + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + score_set = publish_score_set(client, unpublished_score_set["urn"]) + worker_queue.assert_called_once() + change_ownership(session, score_set['experiment']['urn'], ExperimentDbModel) + change_ownership(session, score_set["urn"], ScoreSetDbModel) + response = client.get(f"/api/v1/experiments/{score_set['experiment']['urn']}") assert response.status_code == 200 response_data = response.json() assert response_data["numScoreSets"] == 1 - assert pub_score_set["urn"] in response_data["scoreSetUrns"] + assert score_set["urn"] in response_data["scoreSetUrns"] def test_users_get_one_published_score_set_from_others_experiment_with_a_private_score_set( @@ -1113,9 +1112,9 @@ def test_users_get_one_published_score_set_from_others_experiment_with_a_private score_set_1 = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() score_set_2 = create_seq_score_set_with_variants( client, session, data_provider, pub_score_set['experiment']['urn'], data_files / "scores.csv" ) @@ -1133,9 +1132,9 @@ def test_users_get_two_score_sets_from_own_experiment_with_a_private_and_a_publi score_set_1 = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set_1['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set = publish_score_set(client, score_set_1["urn"]) + worker_queue.assert_called_once() score_set_2 = create_seq_score_set_with_variants( client, session, data_provider, pub_score_set['experiment']['urn'], data_files / "scores.csv" ) @@ -1153,9 +1152,9 @@ def test_users_get_one_score_set_from_own_experiment_with_a_superseding_score_se score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" ) - pub_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") - assert pub_score_set_response.status_code == 200 - pub_score_set = pub_score_set_response.json() + with patch.object(arq.ArqRedis, "enqueue_job", return_value=None) as worker_queue: + pub_score_set = publish_score_set(client, score_set["urn"]) + worker_queue.assert_called_once() score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) score_set_post_payload["experimentUrn"] = pub_score_set["experiment"]["urn"] score_set_post_payload["supersededScoreSetUrn"] = pub_score_set["urn"] From 5a1ff11fc251e926f2d46ecf40e296af4e748a79 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 May 2025 14:51:53 +1000 Subject: [PATCH 151/166] Adjust import position --- tests/routers/test_experiment_set.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index 73fb8acc..536d724b 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -1,8 +1,5 @@ -from unittest.mock import patch import pytest - -arq = pytest.importorskip("arq") - +from unittest.mock import patch from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel @@ -15,6 +12,8 @@ from tests.helpers.util.score_set import create_seq_score_set_with_variants, publish_score_set from tests.helpers.util.user import change_ownership +arq = pytest.importorskip("arq") + def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): experiment = create_experiment(client) From 3eb8ddc939606ad10046923a5615b29c18319840 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 May 2025 15:18:29 +1000 Subject: [PATCH 152/166] Add import fastapi --- tests/routers/test_experiment_set.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index 536d724b..89c6e456 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -13,6 +13,8 @@ from tests.helpers.util.user import change_ownership arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): From 571b2967aad54f06d93779eba8690b6160c7dbe3 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 May 2025 16:27:41 +1000 Subject: [PATCH 153/166] Debug a test. --- tests/routers/test_score_set.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 84754583..b3a7a869 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -207,6 +207,7 @@ def test_create_score_set_with_score_range_and_odds_path_source( deepcopy(TEST_MINIMAL_SEQ_SCORESET_RESPONSE), experiment, response_data ) expected_response[camelize(publication_list)] = [SAVED_PUBMED_PUBLICATION] + expected_response["experiment"].update({"numScoreSets": 1}) expected_response["scoreRanges"] = TEST_SAVED_SCORE_SET_RANGE_WITH_ODDS_PATH_AND_SOURCE assert sorted(expected_response.keys()) == sorted(response_data.keys()) From d11d11722be822deb40742e3215bf6478fd07558 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 May 2025 16:50:16 +1000 Subject: [PATCH 154/166] Remove an unnecessary import. --- tests/routers/test_experiment_set.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index 89c6e456..8ee9fcb7 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -14,7 +14,6 @@ arq = pytest.importorskip("arq") cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): From 9edf3f55e37692b5bd2c1ebb05a802d2082251ba Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Mon, 2 Jun 2025 09:44:17 +1000 Subject: [PATCH 155/166] Try again. Add an importorskip. --- tests/routers/test_experiment_set.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index 8ee9fcb7..89c6e456 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -14,6 +14,7 @@ arq = pytest.importorskip("arq") cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): From 03090af751db6458210a154937005d96dbcf59f6 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Mon, 2 Jun 2025 19:03:24 +1000 Subject: [PATCH 156/166] Modify code and tests. Revert the yml file back to original content. --- docker-compose-dev.yml | 7 +------ src/mavedb/lib/experiments.py | 12 ++++++------ src/mavedb/routers/experiments.py | 24 ++++++++---------------- src/mavedb/routers/score_sets.py | 24 ++++++------------------ tests/helpers/constants.py | 3 +++ tests/routers/test_experiments.py | 18 ++++++------------ 6 files changed, 30 insertions(+), 58 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 294708d0..2def980a 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -77,15 +77,10 @@ services: - mavedb-seqrepo-dev:/usr/local/share/seqrepo seqrepo: - image: biocommons/seqrepo:2021-01-29 + image: biocommons/seqrepo:2024-12-20 volumes: - mavedb-seqrepo-dev:/usr/local/share/seqrepo -# rabbitmq: -# image: rabbitmq:3.8.3 -# ports: -# - "5673:5672" - volumes: mavedb-data-dev: mavedb-redis-dev: diff --git a/src/mavedb/lib/experiments.py b/src/mavedb/lib/experiments.py index bb68df02..d04107bf 100644 --- a/src/mavedb/lib/experiments.py +++ b/src/mavedb/lib/experiments.py @@ -133,7 +133,7 @@ def search_experiments( def enrich_experiment_with_num_score_sets( item_update: Experiment, user_data: Optional[UserData] -) -> Any: +) -> experiment.Experiment: """ Validate and update the number of score set in experiment. The superseded score set is excluded. Data structure: experiment{score_set_urns, num_score_sets} @@ -145,11 +145,11 @@ def enrich_experiment_with_num_score_sets( user_data ) for score_set in item_update.score_sets ] - filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None] - filtered_score_set_urns = [] - if filtered_score_sets: - filtered_score_set_urns = list(set([score_set.urn for score_set in filtered_score_sets])) - filtered_score_set_urns.sort() + filtered_score_set_urns = sorted({ + score_set.urn + for score_set in filter_superseded_score_set_tails + if score_set is not None and score_set.urn is not None + }) updated_experiment = experiment.Experiment.from_orm(item_update).copy(update={ "num_score_sets": len(filtered_score_set_urns), diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index c35d4b13..80f29401 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -90,12 +90,10 @@ def search_experiments(search: ExperimentsSearch, db: Session = Depends(deps.get Search experiments. """ items = _search_experiments(db, None, search) - if items: - items = [ - enrich_experiment_with_num_score_sets(exp, None) - for exp in items - ] - return items + return [ + enrich_experiment_with_num_score_sets(exp, None) + for exp in items + ] @router.post( @@ -112,12 +110,10 @@ def search_my_experiments( Search experiments created by the current user.. """ items = _search_experiments(db, user_data.user, search) - if items: - items = [ + return [ enrich_experiment_with_num_score_sets(exp, user_data) for exp in items ] - return items @router.get( @@ -132,7 +128,7 @@ def fetch_experiment( urn: str, db: Session = Depends(deps.get_db), user_data: Optional[UserData] = Depends(get_current_user), -) -> Experiment: +) -> experiment.Experiment: """ Fetch a single experiment by URN. """ @@ -145,9 +141,7 @@ def fetch_experiment( raise HTTPException(status_code=404, detail=f"Experiment with URN {urn} not found") assert_permission(user_data, item, Action.READ) - updated_experiment = enrich_experiment_with_num_score_sets(item, user_data) - - return updated_experiment + return enrich_experiment_with_num_score_sets(item, user_data) @router.get( @@ -450,9 +444,7 @@ async def update_experiment( db.refresh(item) save_to_logging_context({"updated_resource": item.urn}) - updated_item = enrich_experiment_with_num_score_sets(item, user_data) - - return updated_item + return enrich_experiment_with_num_score_sets(item, user_data) @router.delete("/experiments/{urn}", response_model=None, responses={422: {}}) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 16e7a192..aaa427f1 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -222,9 +222,7 @@ async def show_score_set( save_to_logging_context({"requested_resource": urn}) item = await fetch_score_set_by_urn(db, urn, user_data, None, False) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.get( @@ -670,9 +668,7 @@ async def create_score_set( save_to_logging_context({"created_resource": item.urn}) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.post( @@ -737,9 +733,7 @@ async def upload_score_set_variant_data( db.commit() db.refresh(item) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.post( @@ -777,9 +771,7 @@ async def update_score_set_calibration_data( save_to_logging_context({"updated_resource": item.urn}) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.put( @@ -1037,9 +1029,7 @@ async def update_score_set( save_to_logging_context({"updated_resource": item.urn}) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.delete("/score-sets/{urn}", responses={422: {}}) @@ -1178,9 +1168,7 @@ async def publish_score_set( ) enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data) - response_item = score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) - - return response_item + return score_set.ScoreSet.from_orm(item).copy(update={"experiment": enriched_experiment}) @router.get( diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 91399a77..aac54425 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -377,6 +377,7 @@ "creationDate": date.today().isoformat(), "modificationDate": date.today().isoformat(), "scoreSetUrns": [], + "numScoreSets": 0, # NOTE: This is context-dependent and may need overriding per test "contributors": [], "keywords": [], "doiIdentifiers": [], @@ -426,6 +427,7 @@ "urn": None, "experimentSetUrn": None, "officialCollections": [], + "numScoreSets": 0, # NOTE: This is context-dependent and may need overriding per test } TEST_EXPERIMENT_WITH_KEYWORD_HAS_DUPLICATE_OTHERS_RESPONSE = { @@ -475,6 +477,7 @@ "urn": None, "experimentSetUrn": None, "officialCollections": [], + "numScoreSets": 0, # NOTE: This is context-dependent and may need overriding per test } TEST_TAXONOMY = { diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py index 8c327845..3941bbbd 100644 --- a/tests/routers/test_experiments.py +++ b/tests/routers/test_experiments.py @@ -60,8 +60,7 @@ def test_create_minimal_experiment(client, setup_router_db): expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) expected_response.update({ "urn": response_data["urn"], - "experimentSetUrn": response_data["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": response_data["experimentSetUrn"] }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -85,8 +84,7 @@ def test_create_experiment_with_contributor(client, setup_router_db): expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) expected_response.update({ "urn": response_data["urn"], - "experimentSetUrn": response_data["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": response_data["experimentSetUrn"] }) expected_response["contributors"] = [ { @@ -111,8 +109,7 @@ def test_create_experiment_with_keywords(session, client, setup_router_db): expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE) expected_response.update({ "urn": response_data["urn"], - "experimentSetUrn": response_data["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": response_data["experimentSetUrn"] }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -408,8 +405,7 @@ def test_create_experiment_that_keywords_have_duplicate_others(client, setup_rou expected_response = deepcopy(TEST_EXPERIMENT_WITH_KEYWORD_HAS_DUPLICATE_OTHERS_RESPONSE) expected_response.update({ "urn": response_data["urn"], - "experimentSetUrn": response_data["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": response_data["experimentSetUrn"] }) assert sorted(expected_response.keys()) == sorted(response_data.keys()) for key in expected_response: @@ -1012,8 +1008,7 @@ def test_get_own_private_experiment(client, setup_router_db): expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) expected_response.update({ "urn": experiment["urn"], - "experimentSetUrn": experiment["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": experiment["experimentSetUrn"] }) response = client.get(f"/api/v1/experiments/{experiment['urn']}") assert response.status_code == 200 @@ -1049,8 +1044,7 @@ def test_admin_can_get_other_users_private_experiment(client, admin_app_override expected_response = deepcopy(TEST_MINIMAL_EXPERIMENT_RESPONSE) expected_response.update({ "urn": experiment["urn"], - "experimentSetUrn": experiment["experimentSetUrn"], - "numScoreSets": 0 + "experimentSetUrn": experiment["experimentSetUrn"] }) with DependencyOverrider(admin_app_overrides): response = client.get(f"/api/v1/experiments/{experiment['urn']}") From 0050085ca2630dbef29c36e4bbe5370b1956813f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 2 Jun 2025 12:43:17 -0700 Subject: [PATCH 157/166] Resolved importorskips prior to MaveDB imports --- tests/routers/test_experiment_set.py | 35 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/tests/routers/test_experiment_set.py b/tests/routers/test_experiment_set.py index 89c6e456..ebf5c9da 100644 --- a/tests/routers/test_experiment_set.py +++ b/tests/routers/test_experiment_set.py @@ -1,5 +1,12 @@ +# ruff: noqa: E402 + import pytest from unittest.mock import patch + +arq = pytest.importorskip("arq") +cdot = pytest.importorskip("cdot") +fastapi = pytest.importorskip("fastapi") + from mavedb.models.experiment import Experiment as ExperimentDbModel from mavedb.models.experiment_set import ExperimentSet as ExperimentSetDbModel from mavedb.models.score_set import ScoreSet as ScoreSetDbModel @@ -12,10 +19,6 @@ from tests.helpers.util.score_set import create_seq_score_set_with_variants, publish_score_set from tests.helpers.util.user import change_ownership -arq = pytest.importorskip("arq") -cdot = pytest.importorskip("cdot") -fastapi = pytest.importorskip("fastapi") - def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_router_db): experiment = create_experiment(client) @@ -28,7 +31,8 @@ def test_users_get_one_private_experiment_from_own_experiment_set(client, setup_ def test_users_get_one_experiment_one_score_set_from_own_private_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -43,7 +47,8 @@ def test_users_get_one_experiment_one_score_set_from_own_private_experiment_set( def test_users_get_one_experiment_one_score_set_from_others_private_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -85,7 +90,8 @@ def test_users_get_one_experiment_one_score_set_from_others_private_experiment_s def test_users_get_one_experiment_none_score_set_from_others_private_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -118,7 +124,8 @@ def test_users_get_one_experiment_none_score_set_from_others_private_experiment_ def test_users_get_none_experiment_none_score_set_from_others_private_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -141,7 +148,8 @@ def test_users_get_none_experiment_none_score_set_from_others_private_experiment def test_users_get_one_experiment_one_score_set_from_own_public_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -160,7 +168,8 @@ def test_users_get_one_experiment_one_score_set_from_own_public_experiment_set( def test_users_get_one_experiment_one_score_set_from_other_public_experiment_set( - session, data_provider, client, setup_router_db, data_files): + session, data_provider, client, setup_router_db, data_files +): experiment = create_experiment(client) score_set = create_seq_score_set_with_variants( client, session, data_provider, experiment["urn"], data_files / "scores.csv" @@ -170,12 +179,12 @@ def test_users_get_one_experiment_one_score_set_from_other_public_experiment_set worker_queue.assert_called_once() change_ownership(session, pub_score_set["urn"], ScoreSetDbModel) - change_ownership(session, pub_score_set['experiment']['urn'], ExperimentDbModel) - change_ownership(session, pub_score_set['experiment']['experimentSetUrn'], ExperimentSetDbModel) + change_ownership(session, pub_score_set["experiment"]["urn"], ExperimentDbModel) + change_ownership(session, pub_score_set["experiment"]["experimentSetUrn"], ExperimentSetDbModel) response = client.get(f"/api/v1/experiment-sets/{pub_score_set['experiment']['experimentSetUrn']}") assert response.status_code == 200 response_data = response.json() assert response_data["numExperiments"] == 1 assert response_data["experiments"][0]["urn"] == pub_score_set["experiment"]["urn"] assert response_data["experiments"][0]["numScoreSets"] == 1 - assert pub_score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] \ No newline at end of file + assert pub_score_set["urn"] in response_data["experiments"][0]["scoreSetUrns"] From 8637a181211517b6d60f809a0ab2a5140b6afbac Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Mon, 2 Jun 2025 12:43:42 -0700 Subject: [PATCH 158/166] Remove obsolete Any typing import --- src/mavedb/lib/experiments.py | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/mavedb/lib/experiments.py b/src/mavedb/lib/experiments.py index d04107bf..6e133609 100644 --- a/src/mavedb/lib/experiments.py +++ b/src/mavedb/lib/experiments.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Optional +from typing import Optional from sqlalchemy import func, or_, not_ from sqlalchemy.orm import Session @@ -112,7 +112,7 @@ def search_experiments( # Keep experiments without any score sets not_(Experiment.score_sets.any()), # Keep experiments where score sets exist but have no meta_analyzes_score_sets - Experiment.score_sets.any(not_(ScoreSet.meta_analyzes_score_sets.any())) + Experiment.score_sets.any(not_(ScoreSet.meta_analyzes_score_sets.any())), ) ) else: @@ -132,27 +132,27 @@ def search_experiments( def enrich_experiment_with_num_score_sets( - item_update: Experiment, user_data: Optional[UserData] + item_update: Experiment, user_data: Optional[UserData] ) -> experiment.Experiment: """ Validate and update the number of score set in experiment. The superseded score set is excluded. Data structure: experiment{score_set_urns, num_score_sets} """ filter_superseded_score_set_tails = [ - find_superseded_score_set_tail( - score_set, - Action.READ, - user_data - ) for score_set in item_update.score_sets + find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in item_update.score_sets ] - filtered_score_set_urns = sorted({ - score_set.urn - for score_set in filter_superseded_score_set_tails - if score_set is not None and score_set.urn is not None - }) - - updated_experiment = experiment.Experiment.from_orm(item_update).copy(update={ - "num_score_sets": len(filtered_score_set_urns), - "score_set_urns": filtered_score_set_urns, - }) + filtered_score_set_urns = sorted( + { + score_set.urn + for score_set in filter_superseded_score_set_tails + if score_set is not None and score_set.urn is not None + } + ) + + updated_experiment = experiment.Experiment.from_orm(item_update).copy( + update={ + "num_score_sets": len(filtered_score_set_urns), + "score_set_urns": filtered_score_set_urns, + } + ) return updated_experiment From 2a22cd2f97c7f717319ac92a8cb522e981535db2 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 3 Jun 2025 09:29:00 +1000 Subject: [PATCH 159/166] Move the numScoreSet to keys to be set part. --- tests/helpers/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index aac54425..2dc9b770 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -377,7 +377,6 @@ "creationDate": date.today().isoformat(), "modificationDate": date.today().isoformat(), "scoreSetUrns": [], - "numScoreSets": 0, # NOTE: This is context-dependent and may need overriding per test "contributors": [], "keywords": [], "doiIdentifiers": [], @@ -388,6 +387,7 @@ "urn": None, "experimentSetUrn": None, "officialCollections": [], + "numScoreSets": 0, } TEST_EXPERIMENT_WITH_KEYWORD_RESPONSE = { From 3e165bce8f3777b6e8abc08ee2c1e8627512ef9b Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 4 Jun 2025 11:54:52 -0700 Subject: [PATCH 160/166] Fix bugs in mapper job, and centralize some mapper resources variant_mapper_manager previously failed because of an incorrectly placed 'where' function and because we did two related db queries, one for a score set and one for target gene(s) within that score set, which resulted in an error when adding the score set changes to the db. Instead, query to select the score set, and then loop through the score set's target genes rather than querying the target genes table in the db. --- src/mavedb/data_providers/services.py | 30 +++-------------------- src/mavedb/lib/mapping.py | 34 +++++++++++++++++++++++++++ src/mavedb/worker/jobs.py | 33 ++++++++------------------ 3 files changed, 47 insertions(+), 50 deletions(-) create mode 100644 src/mavedb/lib/mapping.py diff --git a/src/mavedb/data_providers/services.py b/src/mavedb/data_providers/services.py index 3b241bef..bff4bdb4 100644 --- a/src/mavedb/data_providers/services.py +++ b/src/mavedb/data_providers/services.py @@ -1,10 +1,10 @@ import os -from datetime import date -from typing import Optional, TypedDict +from typing import Optional -import requests from cdot.hgvs.dataproviders import SeqFetcher, ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider +from mavedb.lib.mapping import VRSMap + GENOMIC_FASTA_FILES = [ "/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz", "/data/GCF_000001405.25_GRCh37.p13_genomic.fna.gz", @@ -21,29 +21,5 @@ def cdot_rest() -> RESTDataProvider: return RESTDataProvider(seqfetcher=seqfetcher()) -class VRSMap: - url: str - - class ScoreSetMappingResults(TypedDict): - metadata: Optional[dict[str, str]] - dcd_mapping_version: str - mapped_date_utc: date - computed_genomic_reference_sequence: Optional[dict[str, str]] - mapped_genomic_reference_sequence: Optional[dict[str, str]] - computed_protein_reference_sequence: Optional[dict[str, str]] - mapped_protein_reference_sequence: Optional[dict[str, str]] - mapped_scores: Optional[list[dict]] - error_message: Optional[str] - - def __init__(self, url: str) -> None: - self.url = url - - def map_score_set(self, score_set_urn: str) -> ScoreSetMappingResults: - uri = f"{self.url}/api/v1/map/{score_set_urn}" - response = requests.post(uri) - response.raise_for_status() - return response.json() - - def vrs_mapper(url: Optional[str] = None) -> VRSMap: return VRSMap(DCD_MAP_URL) if not url else VRSMap(url) diff --git a/src/mavedb/lib/mapping.py b/src/mavedb/lib/mapping.py new file mode 100644 index 00000000..7727199f --- /dev/null +++ b/src/mavedb/lib/mapping.py @@ -0,0 +1,34 @@ +from datetime import date +from typing import Optional, TypedDict + +import requests + +ANNOTATION_LAYERS = { + "g": "genomic", + "p": "protein", + "c": "cdna", +} + + +class VRSMap: + url: str + + class ScoreSetMappingResults(TypedDict): + metadata: Optional[dict[str, str]] + dcd_mapping_version: str + mapped_date_utc: date + computed_genomic_reference_sequence: Optional[dict[str, str]] + mapped_genomic_reference_sequence: Optional[dict[str, str]] + computed_protein_reference_sequence: Optional[dict[str, str]] + mapped_protein_reference_sequence: Optional[dict[str, str]] + mapped_scores: Optional[list[dict]] + error_message: Optional[str] + + def __init__(self, url: str) -> None: + self.url = url + + def map_score_set(self, score_set_urn: str) -> ScoreSetMappingResults: + uri = f"{self.url}/api/v1/map/{score_set_urn}" + response = requests.post(uri) + response.raise_for_status() + return response.json() diff --git a/src/mavedb/worker/jobs.py b/src/mavedb/worker/jobs.py index b3511fd7..3eced28a 100644 --- a/src/mavedb/worker/jobs.py +++ b/src/mavedb/worker/jobs.py @@ -35,6 +35,7 @@ NonexistentMappingResultsError, ) from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import ANNOTATION_LAYERS from mavedb.lib.score_sets import ( columns_for_dataset, create_variants, @@ -50,7 +51,6 @@ from mavedb.models.enums.processing_state import ProcessingState from mavedb.models.mapped_variant import MappedVariant from mavedb.models.published_variant import PublishedVariantsMV -from mavedb.models.target_gene import TargetGene from mavedb.models.score_set import ScoreSet from mavedb.models.user import User from mavedb.models.variant import Variant @@ -248,12 +248,6 @@ async def create_variants_for_score_set( # Mapping variants #################################################################################################### -ANNOTATION_LAYERS = { - "g": "genomic", - "p": "protein", - "c": "cdna", -} - @asynccontextmanager async def mapping_in_execution(redis: ArqRedis, job_id: str): @@ -397,25 +391,19 @@ async def map_variants_for_score_set( score_set.mapping_state = MappingState.failed score_set.mapping_errors = {"error_message": mapping_results.get("error_message")} else: - # TODO(VariantEffect/dcd-mapping2#2) after adding multi target mapping support: - # this assumes single-target mapping, will need to be changed to support multi-target mapping - # just in case there are multiple target genes in the db for a score set (this point shouldn't be reached - # while we only support single-target mapping), match up the target sequence with the one in the computed genomic reference sequence. - # TODO(VariantEffect/dcd-mapping2#3) after adding accession-based score set mapping support: - # this also assumes that the score set is based on a target sequence, not a target accession - reference_metadata = mapping_results.get("reference_sequences") if not reference_metadata: raise NonexistentMappingReferenceError() for target_gene_identifier in reference_metadata: - target_gene = db.scalars( - select( - TargetGene.where( - TargetGene.name == target_gene_identifier, TargetGene.score_set_id == score_set.id - ) - ) - ).one_or_none() + target_gene = next( + ( + target_gene + for target_gene in score_set.target_genes + if target_gene.name == target_gene_identifier + ), + None, + ) if not target_gene: raise ValueError( f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." @@ -431,8 +419,7 @@ async def map_variants_for_score_set( if layer_premapped: pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { k: layer_premapped[k] - for k in set(list(layer_premapped.keys())) - - excluded_pre_mapped_keys # TODO does this work if no 'sequence' key? + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys } layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( "mapped_reference_sequence" From 0d6efc67af39a9b64ff2b4ea711c22edb49b79b4 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 4 Jun 2025 11:59:00 -0700 Subject: [PATCH 161/166] Update tests to reflect multi-target mapper changes --- tests/helpers/constants.py | 48 +++++++++++++++++++++++++++++++------- tests/worker/test_jobs.py | 15 ++++++++---- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 199ff1b4..af1ca17f 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -832,17 +832,47 @@ } } -TEST_VARIANT_MAPPING_SCAFFOLD = { +TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD = { "metadata": {}, - "computed_genomic_reference_sequence": { - "sequence_type": "dna", - "sequence_id": "ga4gh:SQ.ref_test", - "sequence": "ACGTTT", + "reference_sequences": { + "TEST1": { + "g": { + "computed_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.ref_test", + "sequence": "ACGTTT", + }, + "mapped_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.map_test", + "sequence_accessions": ["NC_000001.11"], + }, + } + } }, - "mapped_genomic_reference_sequence": { - "sequence_type": "dna", - "sequence_id": "ga4gh:SQ.map_test", - "sequence_accessions": ["NC_000001.11"], + "mapped_scores": [], + "vrs_version": "2.0", + "dcd_mapping_version": "pytest.0.0", + "mapped_date_utc": datetime.isoformat(datetime.now()), +} + +TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD = { + "metadata": {}, + "reference_sequences": { + "TEST2": { + "g": { + "computed_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.ref_test", + "sequence": "ACGTTT", + }, + "mapped_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.map_test", + "sequence_accessions": ["NC_000001.11"], + }, + } + } }, "mapped_scores": [], "vrs_version": "2.0", diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index b7b350b0..117ebed1 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -40,6 +40,7 @@ from tests.helpers.constants import ( + TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD, TEST_CLINGEN_SUBMISSION_RESPONSE, TEST_CLINGEN_SUBMISSION_BAD_RESQUEST_RESPONSE, TEST_CLINGEN_SUBMISSION_UNAUTHORIZED_RESPONSE, @@ -48,7 +49,7 @@ TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, TEST_MINIMAL_SEQ_SCORESET, - TEST_VARIANT_MAPPING_SCAFFOLD, + TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, VALID_NT_ACCESSION, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS2_X, @@ -150,10 +151,13 @@ async def sanitize_mapping_queue(standalone_worker_context, score_set): assert int(queued_job.decode("utf-8")) == score_set.id -async def setup_mapping_output(async_client, session, score_set, empty=False): +async def setup_mapping_output(async_client, session, score_set, score_set_is_seq_based=True, empty=False): score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") - mapping_output = deepcopy(TEST_VARIANT_MAPPING_SCAFFOLD) + if score_set_is_seq_based: + mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) mapping_output["metadata"] = score_set_response.json() if empty: @@ -477,11 +481,12 @@ async def test_create_variants_for_score_set_enqueues_manager_and_successful_map arq_worker, arq_redis, ): + score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set) score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set) + return await setup_mapping_output(async_client, session, score_set, score_set_is_seq) async def dummy_submission_job(): return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] @@ -511,7 +516,7 @@ async def dummy_linking_job(): await arq_worker.run_check() # Call data provider _get_transcript method if this is an accession based score set, otherwise do not. - if all(["targetSequence" in target for target in input_score_set["targetGenes"]]): + if score_set_is_seq: hdp.assert_not_called() else: hdp.assert_called_once() From 06d199630320867b5b4b5661d3694257a1739fcd Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Mon, 9 Jun 2025 14:14:13 -0700 Subject: [PATCH 162/166] Update VRSMap class and script for multi-target mapper changes --- src/mavedb/lib/mapping.py | 7 +- .../scripts/populate_mapped_variants.py | 77 +++++++++---------- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/src/mavedb/lib/mapping.py b/src/mavedb/lib/mapping.py index 7727199f..fa608f8c 100644 --- a/src/mavedb/lib/mapping.py +++ b/src/mavedb/lib/mapping.py @@ -1,5 +1,5 @@ from datetime import date -from typing import Optional, TypedDict +from typing import Optional, TypedDict, Union import requests @@ -17,10 +17,7 @@ class ScoreSetMappingResults(TypedDict): metadata: Optional[dict[str, str]] dcd_mapping_version: str mapped_date_utc: date - computed_genomic_reference_sequence: Optional[dict[str, str]] - mapped_genomic_reference_sequence: Optional[dict[str, str]] - computed_protein_reference_sequence: Optional[dict[str, str]] - mapped_protein_reference_sequence: Optional[dict[str, str]] + reference_sequences: Optional[dict[str, dict[str, dict[str, dict[str, Union[str, list[str]]]]]]] mapped_scores: Optional[list[dict]] error_message: Optional[str] diff --git a/src/mavedb/scripts/populate_mapped_variants.py b/src/mavedb/scripts/populate_mapped_variants.py index d1adbd42..ca4b251a 100644 --- a/src/mavedb/scripts/populate_mapped_variants.py +++ b/src/mavedb/scripts/populate_mapped_variants.py @@ -8,11 +8,12 @@ from sqlalchemy.orm import Session from mavedb.data_providers.services import vrs_mapper +from mavedb.lib.exceptions import NonexistentMappingReferenceError from mavedb.lib.logging.context import format_raised_exception_info_as_dict +from mavedb.lib.mapping import ANNOTATION_LAYERS from mavedb.models.enums.mapping_state import MappingState from mavedb.models.score_set import ScoreSet from mavedb.models.mapped_variant import MappedVariant -from mavedb.models.target_gene import TargetGene from mavedb.models.variant import Variant from mavedb.scripts.environment import script_environment, with_database_session @@ -91,47 +92,43 @@ def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all db.commit() logger.info(f"No mapped variants available for {score_set.urn}.") else: - computed_genomic_ref = mapped_scoreset.get("computed_genomic_reference_sequence") - mapped_genomic_ref = mapped_scoreset.get("mapped_genomic_reference_sequence") - computed_protein_ref = mapped_scoreset.get("computed_protein_reference_sequence") - mapped_protein_ref = mapped_scoreset.get("mapped_protein_reference_sequence") - - # assumes one target gene per score set, which is currently true in mavedb as of sept. 2024. - target_gene = db.scalars( - select(TargetGene) - .join(ScoreSet) - .where( - ScoreSet.urn == str(score_set.urn), + reference_metadata = mapped_scoreset.get("reference_sequences") + if not reference_metadata: + raise NonexistentMappingReferenceError() + + for target_gene_identifier in reference_metadata: + target_gene = next( + ( + target_gene + for target_gene in score_set.target_genes + if target_gene.name == target_gene_identifier + ), + None, ) - ).one() - - excluded_pre_mapped_keys = {"sequence"} - if computed_genomic_ref and mapped_genomic_ref: - pre_mapped_metadata = computed_genomic_ref - target_gene.pre_mapped_metadata = cast( - { - "genomic": { - k: pre_mapped_metadata[k] - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys + if not target_gene: + raise ValueError( + f"Target gene {target_gene_identifier} not found in database for score set {score_set.urn}." + ) + # allow for multiple annotation layers + pre_mapped_metadata = {} + post_mapped_metadata = {} + excluded_pre_mapped_keys = {"sequence"} + for annotation_layer in reference_metadata[target_gene_identifier]: + layer_premapped = reference_metadata[target_gene_identifier][annotation_layer].get( + "computed_reference_sequence" + ) + if layer_premapped: + pre_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = { + k: layer_premapped[k] + for k in set(list(layer_premapped.keys())) - excluded_pre_mapped_keys } - }, - JSONB, - ) - target_gene.post_mapped_metadata = cast({"genomic": mapped_genomic_ref}, JSONB) - elif computed_protein_ref and mapped_protein_ref: - pre_mapped_metadata = computed_protein_ref - target_gene.pre_mapped_metadata = cast( - { - "protein": { - k: pre_mapped_metadata[k] - for k in set(list(pre_mapped_metadata.keys())) - excluded_pre_mapped_keys - } - }, - JSONB, - ) - target_gene.post_mapped_metadata = cast({"protein": mapped_protein_ref}, JSONB) - else: - raise ValueError(f"incomplete or inconsistent metadata for score set {score_set.urn}") + layer_postmapped = reference_metadata[target_gene_identifier][annotation_layer].get( + "mapped_reference_sequence" + ) + if layer_postmapped: + post_mapped_metadata[ANNOTATION_LAYERS[annotation_layer]] = layer_postmapped + target_gene.pre_mapped_metadata = cast(pre_mapped_metadata, JSONB) + target_gene.post_mapped_metadata = cast(post_mapped_metadata, JSONB) mapped_variants = [ variant_from_mapping(db=db, mapping=mapped_score, dcd_mapping_version=dcd_mapping_version) From deabbc28e15dab81326b1a577627240dc79784ca Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Tue, 10 Jun 2025 13:55:29 -0700 Subject: [PATCH 163/166] Create accession-based score set in db before mapping tests --- tests/worker/test_jobs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 117ebed1..05de6e33 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -58,16 +58,17 @@ ) from tests.helpers.util.exceptions import awaitable_exception from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_seq_score_set +from tests.helpers.util.score_set import create_acc_score_set, create_seq_score_set @pytest.fixture def populate_worker_db(data_files, client): # create score set via API. In production, the API would invoke this worker job experiment = create_experiment(client) - score_set = create_seq_score_set(client, experiment["urn"]) + seq_score_set = create_seq_score_set(client, experiment["urn"]) + acc_score_set = create_acc_score_set(client, experiment["urn"]) - return score_set["urn"] + return [seq_score_set["urn"], acc_score_set["urn"]] async def setup_records_and_files(async_client, data_files, input_score_set): From ccbe20f77b4c8622ef31bebbb417447f641ae989 Mon Sep 17 00:00:00 2001 From: Sally Grindstaff Date: Wed, 11 Jun 2025 08:19:06 -0700 Subject: [PATCH 164/166] Add worker tests for multi-target scoresets --- tests/helpers/constants.py | 178 ++++++++++++++++++++++ tests/helpers/util/score_set.py | 19 +++ tests/worker/data/counts_multi_target.csv | 4 + tests/worker/data/scores_multi_target.csv | 4 + tests/worker/test_jobs.py | 72 +++++++-- 5 files changed, 262 insertions(+), 15 deletions(-) create mode 100644 tests/worker/data/counts_multi_target.csv create mode 100644 tests/worker/data/scores_multi_target.csv diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index af1ca17f..c70ce624 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -773,6 +773,147 @@ "officialCollections": [], } +TEST_MINIMAL_MULTI_TARGET_SCORESET = { + "title": "Test Multi Target Score Set Title", + "shortDescription": "Test multi target score set", + "abstractText": "Abstract", + "methodText": "Methods", + "licenseId": 1, + "targetGenes": [ + { + "name": "TEST3", + "category": "protein_coding", + "externalIdentifiers": [], + "targetSequence": { + "sequenceType": "dna", + "sequence": "ACGTTT", + "label": "TEST3", + "taxonomy": { + "taxId": TEST_TAXONOMY["tax_id"], + "organismName": TEST_TAXONOMY["organism_name"], + "commonName": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "hasDescribedSpeciesName": TEST_TAXONOMY["has_described_species_name"], + "articleReference": TEST_TAXONOMY["article_reference"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], + }, + }, + }, + { + "name": "TEST4", + "category": "protein_coding", + "externalIdentifiers": [], + "targetSequence": { + "sequenceType": "dna", + "sequence": "TAATGCC", + "label": "TEST4", + "taxonomy": { + "taxId": TEST_TAXONOMY["tax_id"], + "organismName": TEST_TAXONOMY["organism_name"], + "commonName": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "hasDescribedSpeciesName": TEST_TAXONOMY["has_described_species_name"], + "articleReference": TEST_TAXONOMY["article_reference"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], + }, + }, + }, + ], +} + +TEST_MINIMAL_MULTI_TARGET_SCORESET_RESPONSE = { + "recordType": "ScoreSet", + "title": "Test Multi Target Score Set Title", + "shortDescription": "Test multi target score set", + "abstractText": "Abstract", + "methodText": "Methods", + "createdBy": { + "recordType": "User", + "firstName": TEST_USER["first_name"], + "lastName": TEST_USER["last_name"], + "orcidId": TEST_USER["username"], + }, + "modifiedBy": { + "recordType": "User", + "firstName": TEST_USER["first_name"], + "lastName": TEST_USER["last_name"], + "orcidId": TEST_USER["username"], + }, + "creationDate": date.today().isoformat(), + "modificationDate": date.today().isoformat(), + "license": { + "recordType": "ShortLicense", + **{camelize(k): v for k, v in TEST_LICENSE.items() if k not in ("text",)}, + }, + "numVariants": 0, + "targetGenes": [ + { + "recordType": "TargetGene", + "name": "TEST3", + "category": "protein_coding", + "externalIdentifiers": [], + "id": 1, + "targetSequence": { + "recordType": "TargetSequence", + "sequenceType": "dna", + "sequence": "ACGTTT", + "label": "TEST3", + "taxonomy": { + "recordType": "Taxonomy", + "taxId": TEST_TAXONOMY["tax_id"], + "organismName": TEST_TAXONOMY["organism_name"], + "commonName": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "hasDescribedSpeciesName": TEST_TAXONOMY["has_described_species_name"], + "articleReference": TEST_TAXONOMY["article_reference"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], + }, + }, + }, + { + "recordType": "TargetGene", + "name": "TEST4", + "category": "protein_coding", + "externalIdentifiers": [], + "id": 1, + "targetSequence": { + "recordType": "TargetSequence", + "sequenceType": "dna", + "sequence": "TAATGCC", + "label": "TEST4", + "taxonomy": { + "recordType": "Taxonomy", + "taxId": TEST_TAXONOMY["tax_id"], + "organismName": TEST_TAXONOMY["organism_name"], + "commonName": TEST_TAXONOMY["common_name"], + "rank": TEST_TAXONOMY["rank"], + "hasDescribedSpeciesName": TEST_TAXONOMY["has_described_species_name"], + "articleReference": TEST_TAXONOMY["article_reference"], + "id": TEST_TAXONOMY["id"], + "url": TEST_TAXONOMY["url"], + }, + }, + }, + ], + "metaAnalyzesScoreSetUrns": [], + "metaAnalyzedByScoreSetUrns": [], + "contributors": [], + "doiIdentifiers": [], + "primaryPublicationIdentifiers": [], + "secondaryPublicationIdentifiers": [], + "datasetColumns": {}, + "externalLinks": {}, + "private": True, + "experiment": TEST_MINIMAL_EXPERIMENT_RESPONSE, + # keys to be set after receiving response + "urn": None, + "processingState": ProcessingState.incomplete.name, + "officialCollections": [], +} + TEST_NT_CDOT_TRANSCRIPT = { "start_codon": 0, "stop_codon": 18, @@ -880,6 +1021,43 @@ "mapped_date_utc": datetime.isoformat(datetime.now()), } +TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD = { + "metadata": {}, + "reference_sequences": { + "TEST3": { + "g": { + "computed_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.ref_test3", + "sequence": "ACGTTT", + }, + "mapped_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.map_test", + "sequence_accessions": ["NC_000001.11"], + }, + } + }, + "TEST4": { + "g": { + "computed_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.ref_test4", + "sequence": "TAATGCC", + }, + "mapped_reference_sequence": { + "sequence_type": "dna", + "sequence_id": "ga4gh:SQ.map_test", + "sequence_accessions": ["NC_000001.11"], + }, + } + }, + }, + "mapped_scores": [], + "vrs_version": "2.0", + "dcd_mapping_version": "pytest.0.0", + "mapped_date_utc": datetime.isoformat(datetime.now()), +} TEST_MINIMAL_MAPPED_VARIANT = { "pre_mapped": {}, diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py index 69ff7ca5..13c96f91 100644 --- a/tests/helpers/util/score_set.py +++ b/tests/helpers/util/score_set.py @@ -16,6 +16,7 @@ from tests.helpers.constants import ( TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_SEQ_SCORESET, + TEST_MINIMAL_MULTI_TARGET_SCORESET, TEST_NT_CDOT_TRANSCRIPT, TEST_VALID_POST_MAPPED_VRS_ALLELE_VRS2_X, TEST_VALID_POST_MAPPED_VRS_CIS_PHASED_BLOCK, @@ -67,6 +68,24 @@ def create_acc_score_set( return response_data +def create_multi_target_score_set( + client: TestClient, experiment_urn: Optional[str], update: Optional[Dict[str, Any]] = None +) -> Dict[str, Any]: + score_set_payload = deepcopy(TEST_MINIMAL_MULTI_TARGET_SCORESET) + if experiment_urn is not None: + score_set_payload["experimentUrn"] = experiment_urn + if update is not None: + score_set_payload.update(update) + jsonschema.validate(instance=score_set_payload, schema=ScoreSetCreate.schema()) + + response = client.post("/api/v1/score-sets/", json=score_set_payload) + assert response.status_code == 200, "Could not create sequence based score set" + + response_data = response.json() + jsonschema.validate(instance=response_data, schema=ScoreSet.schema()) + return response_data + + def create_seq_score_set_with_mapped_variants( client, db, data_provider, experiment_urn, scores_csv_path, update=None, counts_csv_path=None ): diff --git a/tests/worker/data/counts_multi_target.csv b/tests/worker/data/counts_multi_target.csv new file mode 100644 index 00000000..37a1f200 --- /dev/null +++ b/tests/worker/data/counts_multi_target.csv @@ -0,0 +1,4 @@ +hgvs_nt,c_0,c_1 +TEST3:n.1A>T,10,20 +TEST3:n.6T>A,90,2 +TEST4:n.2A>T,15,4 diff --git a/tests/worker/data/scores_multi_target.csv b/tests/worker/data/scores_multi_target.csv new file mode 100644 index 00000000..11dcc55f --- /dev/null +++ b/tests/worker/data/scores_multi_target.csv @@ -0,0 +1,4 @@ +hgvs_nt,score +TEST3:n.1A>T,0.3 +TEST3:n.6T>A,-1.65 +TEST4:n.2A>T,0.1 diff --git a/tests/worker/test_jobs.py b/tests/worker/test_jobs.py index 05de6e33..cdd66c90 100644 --- a/tests/worker/test_jobs.py +++ b/tests/worker/test_jobs.py @@ -48,7 +48,9 @@ TEST_NT_CDOT_TRANSCRIPT, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_EXPERIMENT, + TEST_MINIMAL_MULTI_TARGET_SCORESET, TEST_MINIMAL_SEQ_SCORESET, + TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD, TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD, VALID_NT_ACCESSION, TEST_VALID_PRE_MAPPED_VRS_ALLELE_VRS1_X, @@ -58,7 +60,7 @@ ) from tests.helpers.util.exceptions import awaitable_exception from tests.helpers.util.experiment import create_experiment -from tests.helpers.util.score_set import create_acc_score_set, create_seq_score_set +from tests.helpers.util.score_set import create_acc_score_set, create_multi_target_score_set, create_seq_score_set @pytest.fixture @@ -67,8 +69,9 @@ def populate_worker_db(data_files, client): experiment = create_experiment(client) seq_score_set = create_seq_score_set(client, experiment["urn"]) acc_score_set = create_acc_score_set(client, experiment["urn"]) + multi_target_score_set = create_multi_target_score_set(client, experiment["urn"]) - return [seq_score_set["urn"], acc_score_set["urn"]] + return [seq_score_set["urn"], acc_score_set["urn"], multi_target_score_set["urn"]] async def setup_records_and_files(async_client, data_files, input_score_set): @@ -87,8 +90,16 @@ async def setup_records_and_files(async_client, data_files, input_score_set): score_set = score_set_response.json() jsonschema.validate(instance=score_set, schema=ScoreSet.schema()) - scores_fp = "scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv" - counts_fp = "counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv" + scores_fp = ( + "scores_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("scores.csv" if "targetSequence" in score_set["targetGenes"][0] else "scores_acc.csv") + ) + counts_fp = ( + "counts_multi_target.csv" + if len(score_set["targetGenes"]) > 1 + else ("counts.csv" if "targetSequence" in score_set["targetGenes"][0] else "counts_acc.csv") + ) with ( open(data_files / scores_fp, "rb") as score_file, open(data_files / counts_fp, "rb") as count_file, @@ -152,12 +163,19 @@ async def sanitize_mapping_queue(standalone_worker_context, score_set): assert int(queued_job.decode("utf-8")) == score_set.id -async def setup_mapping_output(async_client, session, score_set, score_set_is_seq_based=True, empty=False): +async def setup_mapping_output( + async_client, session, score_set, score_set_is_seq_based=True, score_set_is_multi_target=False, empty=False +): score_set_response = await async_client.get(f"/api/v1/score-sets/{score_set.urn}") if score_set_is_seq_based: - mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) + if score_set_is_multi_target: + # If this is a multi-target sequence based score set, use the scaffold for that. + mapping_output = deepcopy(TEST_MULTI_TARGET_SCORESET_VARIANT_MAPPING_SCAFFOLD) + else: + mapping_output = deepcopy(TEST_SEQ_SCORESET_VARIANT_MAPPING_SCAFFOLD) else: + # there is not currently a multi-target accession-based score set test mapping_output = deepcopy(TEST_ACC_SCORESET_VARIANT_MAPPING_SCAFFOLD) mapping_output["metadata"] = score_set_response.json() @@ -199,6 +217,13 @@ async def setup_mapping_output(async_client, session, score_set, score_set_is_se ], }, ), + ( + TEST_MINIMAL_MULTI_TARGET_SCORESET, + { + "exception": "encountered 1 invalid variant strings.", + "detail": ["target sequence mismatch for 'n.1T>A' at row 0 for sequence TEST3"], + }, + ), ], ) async def test_create_variants_for_score_set_with_validation_error( @@ -215,8 +240,10 @@ async def test_create_variants_for_score_set_with_validation_error( if input_score_set == TEST_MINIMAL_SEQ_SCORESET: scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "c.1T>A" - else: + elif input_score_set == TEST_MINIMAL_ACC_SCORESET: scores.loc[:, HGVS_NT_COLUMN].iloc[0] = f"{VALID_NT_ACCESSION}:c.1T>A" + elif input_score_set == TEST_MINIMAL_MULTI_TARGET_SCORESET: + scores.loc[:, HGVS_NT_COLUMN].iloc[0] = "TEST3:n.1T>A" with ( patch.object( @@ -247,7 +274,9 @@ async def test_create_variants_for_score_set_with_validation_error( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_with_caught_exception( input_score_set, setup_worker_db, @@ -281,7 +310,9 @@ async def test_create_variants_for_score_set_with_caught_exception( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_with_caught_base_exception( input_score_set, setup_worker_db, @@ -314,7 +345,9 @@ async def test_create_variants_for_score_set_with_caught_base_exception( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_with_existing_variants( input_score_set, setup_worker_db, @@ -370,7 +403,9 @@ async def test_create_variants_for_score_set_with_existing_variants( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_with_existing_exceptions( input_score_set, setup_worker_db, @@ -434,7 +469,9 @@ async def test_create_variants_for_score_set_with_existing_exceptions( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set( input_score_set, setup_worker_db, @@ -472,7 +509,9 @@ async def test_create_variants_for_score_set( @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_enqueues_manager_and_successful_mapping( input_score_set, setup_worker_db, @@ -483,11 +522,12 @@ async def test_create_variants_for_score_set_enqueues_manager_and_successful_map arq_redis, ): score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]]) + score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1 score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set) score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one() async def dummy_mapping_job(): - return await setup_mapping_output(async_client, session, score_set, score_set_is_seq) + return await setup_mapping_output(async_client, session, score_set, score_set_is_seq, score_set_is_multi_target) async def dummy_submission_job(): return [TEST_CLINGEN_SUBMISSION_RESPONSE, None] @@ -539,7 +579,9 @@ async def dummy_linking_job(): @pytest.mark.asyncio -@pytest.mark.parametrize("input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET)) +@pytest.mark.parametrize( + "input_score_set", (TEST_MINIMAL_SEQ_SCORESET, TEST_MINIMAL_ACC_SCORESET, TEST_MINIMAL_MULTI_TARGET_SCORESET) +) async def test_create_variants_for_score_set_exception_skips_mapping( input_score_set, setup_worker_db, From 2c5f3c98b1ff312ec7cb3c9da6998c707d225a1d Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Jun 2025 12:47:32 -0700 Subject: [PATCH 165/166] Update requests dependency version to 2.32.2 in pyproject.toml --- poetry.lock | 1037 ++++++++++++++++++++++++------------------------ pyproject.toml | 2 +- 2 files changed, 527 insertions(+), 512 deletions(-) diff --git a/poetry.lock b/poetry.lock index b9900552..22840922 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "alembic" @@ -688,13 +688,13 @@ crt = ["awscrt (==0.21.2)"] [[package]] name = "botocore-stubs" -version = "1.38.9" +version = "1.38.30" description = "Type annotations and code completion for botocore" optional = false python-versions = ">=3.8" files = [ - {file = "botocore_stubs-1.38.9-py3-none-any.whl", hash = "sha256:2960c28500509acbe885b30907c997d96a6bfc492fb5165cebd45353111048d2"}, - {file = "botocore_stubs-1.38.9.tar.gz", hash = "sha256:a9fa4b77aebd463a6e0518961dc662f0e69bb8eb4fe035888fe9a1dbbf179b21"}, + {file = "botocore_stubs-1.38.30-py3-none-any.whl", hash = "sha256:2efb8bdf36504aff596c670d875d8f7dd15205277c15c4cea54afdba8200c266"}, + {file = "botocore_stubs-1.38.30.tar.gz", hash = "sha256:291d7bf39a316c00a8a55b7255489b02c0cea1a343482e7784e8d1e235bae995"}, ] [package.dependencies] @@ -1002,74 +1002,78 @@ type = ["pytest-mypy"] [[package]] name = "coverage" -version = "7.8.0" +version = "7.8.2" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" files = [ - {file = "coverage-7.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2931f66991175369859b5fd58529cd4b73582461877ecfd859b6549869287ffe"}, - {file = "coverage-7.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52a523153c568d2c0ef8826f6cc23031dc86cffb8c6aeab92c4ff776e7951b28"}, - {file = "coverage-7.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c8a5c139aae4c35cbd7cadca1df02ea8cf28a911534fc1b0456acb0b14234f3"}, - {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a26c0c795c3e0b63ec7da6efded5f0bc856d7c0b24b2ac84b4d1d7bc578d676"}, - {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821f7bcbaa84318287115d54becb1915eece6918136c6f91045bb84e2f88739d"}, - {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a321c61477ff8ee705b8a5fed370b5710c56b3a52d17b983d9215861e37b642a"}, - {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ed2144b8a78f9d94d9515963ed273d620e07846acd5d4b0a642d4849e8d91a0c"}, - {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:042e7841a26498fff7a37d6fda770d17519982f5b7d8bf5278d140b67b61095f"}, - {file = "coverage-7.8.0-cp310-cp310-win32.whl", hash = "sha256:f9983d01d7705b2d1f7a95e10bbe4091fabc03a46881a256c2787637b087003f"}, - {file = "coverage-7.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a570cd9bd20b85d1a0d7b009aaf6c110b52b5755c17be6962f8ccd65d1dbd23"}, - {file = "coverage-7.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7ac22a0bb2c7c49f441f7a6d46c9c80d96e56f5a8bc6972529ed43c8b694e27"}, - {file = "coverage-7.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf13d564d310c156d1c8e53877baf2993fb3073b2fc9f69790ca6a732eb4bfea"}, - {file = "coverage-7.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5761c70c017c1b0d21b0815a920ffb94a670c8d5d409d9b38857874c21f70d7"}, - {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ff52d790c7e1628241ffbcaeb33e07d14b007b6eb00a19320c7b8a7024c040"}, - {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d39fc4817fd67b3915256af5dda75fd4ee10621a3d484524487e33416c6f3543"}, - {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b44674870709017e4b4036e3d0d6c17f06a0e6d4436422e0ad29b882c40697d2"}, - {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8f99eb72bf27cbb167b636eb1726f590c00e1ad375002230607a844d9e9a2318"}, - {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b571bf5341ba8c6bc02e0baeaf3b061ab993bf372d982ae509807e7f112554e9"}, - {file = "coverage-7.8.0-cp311-cp311-win32.whl", hash = "sha256:e75a2ad7b647fd8046d58c3132d7eaf31b12d8a53c0e4b21fa9c4d23d6ee6d3c"}, - {file = "coverage-7.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3043ba1c88b2139126fc72cb48574b90e2e0546d4c78b5299317f61b7f718b78"}, - {file = "coverage-7.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bbb5cc845a0292e0c520656d19d7ce40e18d0e19b22cb3e0409135a575bf79fc"}, - {file = "coverage-7.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4dfd9a93db9e78666d178d4f08a5408aa3f2474ad4d0e0378ed5f2ef71640cb6"}, - {file = "coverage-7.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f017a61399f13aa6d1039f75cd467be388d157cd81f1a119b9d9a68ba6f2830d"}, - {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0915742f4c82208ebf47a2b154a5334155ed9ef9fe6190674b8a46c2fb89cb05"}, - {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a40fcf208e021eb14b0fac6bdb045c0e0cab53105f93ba0d03fd934c956143a"}, - {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a1f406a8e0995d654b2ad87c62caf6befa767885301f3b8f6f73e6f3c31ec3a6"}, - {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:77af0f6447a582fdc7de5e06fa3757a3ef87769fbb0fdbdeba78c23049140a47"}, - {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f2d32f95922927186c6dbc8bc60df0d186b6edb828d299ab10898ef3f40052fe"}, - {file = "coverage-7.8.0-cp312-cp312-win32.whl", hash = "sha256:769773614e676f9d8e8a0980dd7740f09a6ea386d0f383db6821df07d0f08545"}, - {file = "coverage-7.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5d2b9be5b0693cf21eb4ce0ec8d211efb43966f6657807f6859aab3814f946b"}, - {file = "coverage-7.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ac46d0c2dd5820ce93943a501ac5f6548ea81594777ca585bf002aa8854cacd"}, - {file = "coverage-7.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:771eb7587a0563ca5bb6f622b9ed7f9d07bd08900f7589b4febff05f469bea00"}, - {file = "coverage-7.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42421e04069fb2cbcbca5a696c4050b84a43b05392679d4068acbe65449b5c64"}, - {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:554fec1199d93ab30adaa751db68acec2b41c5602ac944bb19187cb9a41a8067"}, - {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008"}, - {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:581a40c7b94921fffd6457ffe532259813fc68eb2bdda60fa8cc343414ce3733"}, - {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f319bae0321bc838e205bf9e5bc28f0a3165f30c203b610f17ab5552cff90323"}, - {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04bfec25a8ef1c5f41f5e7e5c842f6b615599ca8ba8391ec33a9290d9d2db3a3"}, - {file = "coverage-7.8.0-cp313-cp313-win32.whl", hash = "sha256:dd19608788b50eed889e13a5d71d832edc34fc9dfce606f66e8f9f917eef910d"}, - {file = "coverage-7.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:a9abbccd778d98e9c7e85038e35e91e67f5b520776781d9a1e2ee9d400869487"}, - {file = "coverage-7.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:18c5ae6d061ad5b3e7eef4363fb27a0576012a7447af48be6c75b88494c6cf25"}, - {file = "coverage-7.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:95aa6ae391a22bbbce1b77ddac846c98c5473de0372ba5c463480043a07bff42"}, - {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e013b07ba1c748dacc2a80e69a46286ff145935f260eb8c72df7185bf048f502"}, - {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d766a4f0e5aa1ba056ec3496243150698dc0481902e2b8559314368717be82b1"}, - {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad80e6b4a0c3cb6f10f29ae4c60e991f424e6b14219d46f1e7d442b938ee68a4"}, - {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b87eb6fc9e1bb8f98892a2458781348fa37e6925f35bb6ceb9d4afd54ba36c73"}, - {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d1ba00ae33be84066cfbe7361d4e04dec78445b2b88bdb734d0d1cbab916025a"}, - {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f3c38e4e5ccbdc9198aecc766cedbb134b2d89bf64533973678dfcf07effd883"}, - {file = "coverage-7.8.0-cp313-cp313t-win32.whl", hash = "sha256:379fe315e206b14e21db5240f89dc0774bdd3e25c3c58c2c733c99eca96f1ada"}, - {file = "coverage-7.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e4b6b87bb0c846a9315e3ab4be2d52fac905100565f4b92f02c445c8799e257"}, - {file = "coverage-7.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa260de59dfb143af06dcf30c2be0b200bed2a73737a8a59248fcb9fa601ef0f"}, - {file = "coverage-7.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:96121edfa4c2dfdda409877ea8608dd01de816a4dc4a0523356067b305e4e17a"}, - {file = "coverage-7.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b8af63b9afa1031c0ef05b217faa598f3069148eeee6bb24b79da9012423b82"}, - {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89b1f4af0d4afe495cd4787a68e00f30f1d15939f550e869de90a86efa7e0814"}, - {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94ec0be97723ae72d63d3aa41961a0b9a6f5a53ff599813c324548d18e3b9e8c"}, - {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a1d96e780bdb2d0cbb297325711701f7c0b6f89199a57f2049e90064c29f6bd"}, - {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f1d8a2a57b47142b10374902777e798784abf400a004b14f1b0b9eaf1e528ba4"}, - {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cf60dd2696b457b710dd40bf17ad269d5f5457b96442f7f85722bdb16fa6c899"}, - {file = "coverage-7.8.0-cp39-cp39-win32.whl", hash = "sha256:be945402e03de47ba1872cd5236395e0f4ad635526185a930735f66710e1bd3f"}, - {file = "coverage-7.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:90e7fbc6216ecaffa5a880cdc9c77b7418c1dcb166166b78dbc630d07f278cc3"}, - {file = "coverage-7.8.0-pp39.pp310.pp311-none-any.whl", hash = "sha256:b8194fb8e50d556d5849753de991d390c5a1edeeba50f68e3a9253fbd8bf8ccd"}, - {file = "coverage-7.8.0-py3-none-any.whl", hash = "sha256:dbf364b4c5e7bae9250528167dfe40219b62e2d573c854d74be213e1e52069f7"}, - {file = "coverage-7.8.0.tar.gz", hash = "sha256:7a3d62b3b03b4b6fd41a085f3574874cf946cb4604d2b4d3e8dca8cd570ca501"}, + {file = "coverage-7.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd8ec21e1443fd7a447881332f7ce9d35b8fbd2849e761bb290b584535636b0a"}, + {file = "coverage-7.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26c2396674816deaeae7ded0e2b42c26537280f8fe313335858ffff35019be"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1aec326ed237e5880bfe69ad41616d333712c7937bcefc1343145e972938f9b3"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e818796f71702d7a13e50c70de2a1924f729228580bcba1607cccf32eea46e6"}, + {file = "coverage-7.8.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:546e537d9e24efc765c9c891328f30f826e3e4808e31f5d0f87c4ba12bbd1622"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab9b09a2349f58e73f8ebc06fac546dd623e23b063e5398343c5270072e3201c"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fd51355ab8a372d89fb0e6a31719e825cf8df8b6724bee942fb5b92c3f016ba3"}, + {file = "coverage-7.8.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0774df1e093acb6c9e4d58bce7f86656aeed6c132a16e2337692c12786b32404"}, + {file = "coverage-7.8.2-cp310-cp310-win32.whl", hash = "sha256:00f2e2f2e37f47e5f54423aeefd6c32a7dbcedc033fcd3928a4f4948e8b96af7"}, + {file = "coverage-7.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:145b07bea229821d51811bf15eeab346c236d523838eda395ea969d120d13347"}, + {file = "coverage-7.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b99058eef42e6a8dcd135afb068b3d53aff3921ce699e127602efff9956457a9"}, + {file = "coverage-7.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5feb7f2c3e6ea94d3b877def0270dff0947b8d8c04cfa34a17be0a4dc1836879"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:670a13249b957bb9050fab12d86acef7bf8f6a879b9d1a883799276e0d4c674a"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdc8bf760459a4a4187b452213e04d039990211f98644c7292adf1e471162b5"}, + {file = "coverage-7.8.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07a989c867986c2a75f158f03fdb413128aad29aca9d4dbce5fc755672d96f11"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2db10dedeb619a771ef0e2949ccba7b75e33905de959c2643a4607bef2f3fb3a"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e6ea7dba4e92926b7b5f0990634b78ea02f208d04af520c73a7c876d5a8d36cb"}, + {file = "coverage-7.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ef2f22795a7aca99fc3c84393a55a53dd18ab8c93fb431004e4d8f0774150f54"}, + {file = "coverage-7.8.2-cp311-cp311-win32.whl", hash = "sha256:641988828bc18a6368fe72355df5f1703e44411adbe49bba5644b941ce6f2e3a"}, + {file = "coverage-7.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8ab4a51cb39dc1933ba627e0875046d150e88478dbe22ce145a68393e9652975"}, + {file = "coverage-7.8.2-cp311-cp311-win_arm64.whl", hash = "sha256:8966a821e2083c74d88cca5b7dcccc0a3a888a596a04c0b9668a891de3a0cc53"}, + {file = "coverage-7.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2f6fe3654468d061942591aef56686131335b7a8325684eda85dacdf311356c"}, + {file = "coverage-7.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76090fab50610798cc05241bf83b603477c40ee87acd358b66196ab0ca44ffa1"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd0a0a5054be160777a7920b731a0570284db5142abaaf81bcbb282b8d99279"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da23ce9a3d356d0affe9c7036030b5c8f14556bd970c9b224f9c8205505e3b99"}, + {file = "coverage-7.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9392773cffeb8d7e042a7b15b82a414011e9d2b5fdbbd3f7e6a6b17d5e21b20"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:876cbfd0b09ce09d81585d266c07a32657beb3eaec896f39484b631555be0fe2"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3da9b771c98977a13fbc3830f6caa85cae6c9c83911d24cb2d218e9394259c57"}, + {file = "coverage-7.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a990f6510b3292686713bfef26d0049cd63b9c7bb17e0864f133cbfd2e6167f"}, + {file = "coverage-7.8.2-cp312-cp312-win32.whl", hash = "sha256:bf8111cddd0f2b54d34e96613e7fbdd59a673f0cf5574b61134ae75b6f5a33b8"}, + {file = "coverage-7.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:86a323a275e9e44cdf228af9b71c5030861d4d2610886ab920d9945672a81223"}, + {file = "coverage-7.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:820157de3a589e992689ffcda8639fbabb313b323d26388d02e154164c57b07f"}, + {file = "coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca"}, + {file = "coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257"}, + {file = "coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050"}, + {file = "coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48"}, + {file = "coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7"}, + {file = "coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3"}, + {file = "coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7"}, + {file = "coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008"}, + {file = "coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be"}, + {file = "coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b"}, + {file = "coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199"}, + {file = "coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8"}, + {file = "coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d"}, + {file = "coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b"}, + {file = "coverage-7.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:496948261eaac5ac9cf43f5d0a9f6eb7a6d4cb3bedb2c5d294138142f5c18f2a"}, + {file = "coverage-7.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eacd2de0d30871eff893bab0b67840a96445edcb3c8fd915e6b11ac4b2f3fa6d"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b039ffddc99ad65d5078ef300e0c7eed08c270dc26570440e3ef18beb816c1ca"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e49824808d4375ede9dd84e9961a59c47f9113039f1a525e6be170aa4f5c34d"}, + {file = "coverage-7.8.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b069938961dfad881dc2f8d02b47645cd2f455d3809ba92a8a687bf513839787"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:de77c3ba8bb686d1c411e78ee1b97e6e0b963fb98b1637658dd9ad2c875cf9d7"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1676628065a498943bd3f64f099bb573e08cf1bc6088bbe33cf4424e0876f4b3"}, + {file = "coverage-7.8.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8e1a26e7e50076e35f7afafde570ca2b4d7900a491174ca357d29dece5aacee7"}, + {file = "coverage-7.8.2-cp39-cp39-win32.whl", hash = "sha256:6782a12bf76fa61ad9350d5a6ef5f3f020b57f5e6305cbc663803f2ebd0f270a"}, + {file = "coverage-7.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1efa4166ba75ccefd647f2d78b64f53f14fb82622bc94c5a5cb0a622f50f1c9e"}, + {file = "coverage-7.8.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:ec455eedf3ba0bbdf8f5a570012617eb305c63cb9f03428d39bf544cb2b94837"}, + {file = "coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32"}, + {file = "coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27"}, ] [package.dependencies] @@ -1223,15 +1227,18 @@ dev = ["flake8", "ipython", "mock", "pytest", "pytest-cov", "restview", "setupto [[package]] name = "exceptiongroup" -version = "1.2.2" +version = "1.3.0" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, - {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + [package.extras] test = ["pytest (>=6)"] @@ -1310,20 +1317,18 @@ typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "flupy" -version = "1.2.1" -description = "Method chaining built on generators" +version = "1.2.2" +description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining" optional = true -python-versions = "*" +python-versions = ">=3.9" files = [ - {file = "flupy-1.2.1.tar.gz", hash = "sha256:42aab3b4b3eb1984a4616c40d8f049ecdee546eaad9467470731d456dbff7fa4"}, + {file = "flupy-1.2.2-py3-none-any.whl", hash = "sha256:df78e86a6b26ec1be558310b9dd967952d06cf138b1e8c0b422987fcd3d44ca0"}, + {file = "flupy-1.2.2.tar.gz", hash = "sha256:c28ff659b55800a26a64dd0ac1cc616355900718b87978c2db06f5e5cfb58535"}, ] [package.dependencies] typing_extensions = ">=4" -[package.extras] -dev = ["black", "mypy", "pre-commit", "pylint", "pytest", "pytest-benchmark", "pytest-cov"] - [[package]] name = "fqfa" version = "1.3.1" @@ -1340,66 +1345,65 @@ dev = ["black", "flake8", "flake8-pyproject", "mypy", "pre-commit", "pytest"] [[package]] name = "greenlet" -version = "3.2.1" +version = "3.2.3" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.9" files = [ - {file = "greenlet-3.2.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:777c1281aa7c786738683e302db0f55eb4b0077c20f1dc53db8852ffaea0a6b0"}, - {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3059c6f286b53ea4711745146ffe5a5c5ff801f62f6c56949446e0f6461f8157"}, - {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e1a40a17e2c7348f5eee5d8e1b4fa6a937f0587eba89411885a36a8e1fc29bd2"}, - {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5193135b3a8d0017cb438de0d49e92bf2f6c1c770331d24aa7500866f4db4017"}, - {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:639a94d001fe874675b553f28a9d44faed90f9864dc57ba0afef3f8d76a18b04"}, - {file = "greenlet-3.2.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8fe303381e7e909e42fb23e191fc69659910909fdcd056b92f6473f80ef18543"}, - {file = "greenlet-3.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:72c9b668454e816b5ece25daac1a42c94d1c116d5401399a11b77ce8d883110c"}, - {file = "greenlet-3.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6079ae990bbf944cf66bea64a09dcb56085815630955109ffa98984810d71565"}, - {file = "greenlet-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:e63cd2035f49376a23611fbb1643f78f8246e9d4dfd607534ec81b175ce582c2"}, - {file = "greenlet-3.2.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:aa30066fd6862e1153eaae9b51b449a6356dcdb505169647f69e6ce315b9468b"}, - {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b0f3a0a67786facf3b907a25db80efe74310f9d63cc30869e49c79ee3fcef7e"}, - {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64a4d0052de53ab3ad83ba86de5ada6aeea8f099b4e6c9ccce70fb29bc02c6a2"}, - {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:852ef432919830022f71a040ff7ba3f25ceb9fe8f3ab784befd747856ee58530"}, - {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4818116e75a0dd52cdcf40ca4b419e8ce5cb6669630cb4f13a6c384307c9543f"}, - {file = "greenlet-3.2.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9afa05fe6557bce1642d8131f87ae9462e2a8e8c46f7ed7929360616088a3975"}, - {file = "greenlet-3.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5c12f0d17a88664757e81a6e3fc7c2452568cf460a2f8fb44f90536b2614000b"}, - {file = "greenlet-3.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dbb4e1aa2000852937dd8f4357fb73e3911da426df8ca9b8df5db231922da474"}, - {file = "greenlet-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:cb5ee928ce5fedf9a4b0ccdc547f7887136c4af6109d8f2fe8e00f90c0db47f5"}, - {file = "greenlet-3.2.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:0ba2811509a30e5f943be048895a983a8daf0b9aa0ac0ead526dfb5d987d80ea"}, - {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4245246e72352b150a1588d43ddc8ab5e306bef924c26571aafafa5d1aaae4e8"}, - {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7abc0545d8e880779f0c7ce665a1afc3f72f0ca0d5815e2b006cafc4c1cc5840"}, - {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6dcc6d604a6575c6225ac0da39df9335cc0c6ac50725063fa90f104f3dbdb2c9"}, - {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2273586879affca2d1f414709bb1f61f0770adcabf9eda8ef48fd90b36f15d12"}, - {file = "greenlet-3.2.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ff38c869ed30fff07f1452d9a204ece1ec6d3c0870e0ba6e478ce7c1515acf22"}, - {file = "greenlet-3.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e934591a7a4084fa10ee5ef50eb9d2ac8c4075d5c9cf91128116b5dca49d43b1"}, - {file = "greenlet-3.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:063bcf7f8ee28eb91e7f7a8148c65a43b73fbdc0064ab693e024b5a940070145"}, - {file = "greenlet-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7132e024ebeeeabbe661cf8878aac5d2e643975c4feae833142592ec2f03263d"}, - {file = "greenlet-3.2.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:e1967882f0c42eaf42282a87579685c8673c51153b845fde1ee81be720ae27ac"}, - {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e77ae69032a95640a5fe8c857ec7bee569a0997e809570f4c92048691ce4b437"}, - {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3227c6ec1149d4520bc99edac3b9bc8358d0034825f3ca7572165cb502d8f29a"}, - {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ddda0197c5b46eedb5628d33dad034c455ae77708c7bf192686e760e26d6a0c"}, - {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de62b542e5dcf0b6116c310dec17b82bb06ef2ceb696156ff7bf74a7a498d982"}, - {file = "greenlet-3.2.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c07a0c01010df42f1f058b3973decc69c4d82e036a951c3deaf89ab114054c07"}, - {file = "greenlet-3.2.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2530bfb0abcd451ea81068e6d0a1aac6dabf3f4c23c8bd8e2a8f579c2dd60d95"}, - {file = "greenlet-3.2.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1c472adfca310f849903295c351d297559462067f618944ce2650a1878b84123"}, - {file = "greenlet-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:24a496479bc8bd01c39aa6516a43c717b4cee7196573c47b1f8e1011f7c12495"}, - {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:175d583f7d5ee57845591fc30d852b75b144eb44b05f38b67966ed6df05c8526"}, - {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ecc9d33ca9428e4536ea53e79d781792cee114d2fa2695b173092bdbd8cd6d5"}, - {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f56382ac4df3860ebed8ed838f268f03ddf4e459b954415534130062b16bc32"}, - {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc45a7189c91c0f89aaf9d69da428ce8301b0fd66c914a499199cfb0c28420fc"}, - {file = "greenlet-3.2.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51a2f49da08cff79ee42eb22f1658a2aed60c72792f0a0a95f5f0ca6d101b1fb"}, - {file = "greenlet-3.2.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:0c68bbc639359493420282d2f34fa114e992a8724481d700da0b10d10a7611b8"}, - {file = "greenlet-3.2.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:e775176b5c203a1fa4be19f91da00fd3bff536868b77b237da3f4daa5971ae5d"}, - {file = "greenlet-3.2.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d6668caf15f181c1b82fb6406f3911696975cc4c37d782e19cb7ba499e556189"}, - {file = "greenlet-3.2.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:17964c246d4f6e1327edd95e2008988a8995ae3a7732be2f9fc1efed1f1cdf8c"}, - {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04b4ec7f65f0e4a1500ac475c9343f6cc022b2363ebfb6e94f416085e40dea15"}, - {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b38d53cf268da963869aa25a6e4cc84c1c69afc1ae3391738b2603d110749d01"}, - {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a7490f74e8aabc5f29256765a99577ffde979920a2db1f3676d265a3adba41"}, - {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4339b202ac20a89ccd5bde0663b4d00dc62dd25cb3fb14f7f3034dec1b0d9ece"}, - {file = "greenlet-3.2.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a750f1046994b9e038b45ae237d68153c29a3a783075211fb1414a180c8324b"}, - {file = "greenlet-3.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:374ffebaa5fbd10919cd599e5cf8ee18bae70c11f9d61e73db79826c8c93d6f9"}, - {file = "greenlet-3.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b89e5d44f55372efc6072f59ced5ed1efb7b44213dab5ad7e0caba0232c6545"}, - {file = "greenlet-3.2.1-cp39-cp39-win32.whl", hash = "sha256:b7503d6b8bbdac6bbacf5a8c094f18eab7553481a1830975799042f26c9e101b"}, - {file = "greenlet-3.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:e98328b8b8f160925d6b1c5b1879d8e64f6bd8cf11472b7127d579da575b77d9"}, - {file = "greenlet-3.2.1.tar.gz", hash = "sha256:9f4dd4b4946b14bb3bf038f81e1d2e535b7d94f1b2a59fdba1293cd9c1a0a4d7"}, + {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00"}, + {file = "greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302"}, + {file = "greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba"}, + {file = "greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34"}, + {file = "greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163"}, + {file = "greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849"}, + {file = "greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36"}, + {file = "greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3"}, + {file = "greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141"}, + {file = "greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a"}, + {file = "greenlet-3.2.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:42efc522c0bd75ffa11a71e09cd8a399d83fafe36db250a87cf1dacfaa15dc64"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d760f9bdfe79bff803bad32b4d8ffb2c1d2ce906313fc10a83976ffb73d64ca7"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8324319cbd7b35b97990090808fdc99c27fe5338f87db50514959f8059999805"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:8c37ef5b3787567d322331d5250e44e42b58c8c713859b8a04c6065f27efbf72"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ce539fb52fb774d0802175d37fcff5c723e2c7d249c65916257f0a940cee8904"}, + {file = "greenlet-3.2.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:003c930e0e074db83559edc8705f3a2d066d4aa8c2f198aff1e454946efd0f26"}, + {file = "greenlet-3.2.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7e70ea4384b81ef9e84192e8a77fb87573138aa5d4feee541d8014e452b434da"}, + {file = "greenlet-3.2.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:22eb5ba839c4b2156f18f76768233fe44b23a31decd9cc0d4cc8141c211fd1b4"}, + {file = "greenlet-3.2.3-cp39-cp39-win32.whl", hash = "sha256:4532f0d25df67f896d137431b13f4cdce89f7e3d4a96387a41290910df4d3a57"}, + {file = "greenlet-3.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:aaa7aae1e7f75eaa3ae400ad98f8644bb81e1dc6ba47ce8a93d3f17274e08322"}, + {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, ] [package.extras] @@ -1443,120 +1447,120 @@ dev = ["black", "flake8", "ipython", "isort", "jupyter", "pytest (>=5.3)", "pyte [[package]] name = "hiredis" -version = "3.1.0" +version = "3.2.1" description = "Python wrapper for hiredis" optional = true python-versions = ">=3.8" files = [ - {file = "hiredis-3.1.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:2892db9db21f0cf7cc298d09f85d3e1f6dc4c4c24463ab67f79bc7a006d51867"}, - {file = "hiredis-3.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:93cfa6cc25ee2ceb0be81dc61eca9995160b9e16bdb7cca4a00607d57e998918"}, - {file = "hiredis-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2af62070aa9433802cae7be7364d5e82f76462c6a2ae34e53008b637aaa9a156"}, - {file = "hiredis-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:072c162260ebb1d892683107da22d0d5da7a1414739eae4e185cac22fe89627f"}, - {file = "hiredis-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6b232c43e89755ba332c2745ddab059c0bc1a0f01448a3a14d506f8448b1ce6"}, - {file = "hiredis-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb5316c9a65c4dde80796aa245b76011bab64eb84461a77b0a61c1bf2970bcc9"}, - {file = "hiredis-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e812a4e656bbd1c1c15c844b28259c49e26bb384837e44e8d2aa55412c91d2f7"}, - {file = "hiredis-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93a6c9230e5a5565847130c0e1005c8d3aa5ca681feb0ed542c4651323d32feb"}, - {file = "hiredis-3.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a5f65e89ce50a94d9490d5442a649c6116f53f216c8c14eb37cf9637956482b2"}, - {file = "hiredis-3.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9b2d6e33601c67c074c367fdccdd6033e642284e7a56adc130f18f724c378ca8"}, - {file = "hiredis-3.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:bad3b1e0c83849910f28c95953417106f539277035a4b515d1425f93947bc28f"}, - {file = "hiredis-3.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9646de31f5994e6218311dcf216e971703dbf804c510fd3f84ddb9813c495824"}, - {file = "hiredis-3.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:59a9230f3aa38a33d09d8171400de202f575d7a38869e5ce2947829bca6fe359"}, - {file = "hiredis-3.1.0-cp310-cp310-win32.whl", hash = "sha256:0322d70f3328b97da14b6e98b18f0090a12ed8a8bf7ae20932e2eb9d1bb0aa2c"}, - {file = "hiredis-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:802474c18e878b3f9905e160a8b7df87d57885758083eda76c5978265acb41aa"}, - {file = "hiredis-3.1.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:c339ff4b4739b2a40da463763dd566129762f72926bca611ad9a457a9fe64abd"}, - {file = "hiredis-3.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0ffa2552f704a45954627697a378fc2f559004e53055b82f00daf30bd4305330"}, - {file = "hiredis-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9acf7f0e7106f631cd618eb60ec9bbd6e43045addd5310f66ba1177209567e59"}, - {file = "hiredis-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea4f5ecf9dbea93c827486f59c606684c3496ea71c7ba9a8131932780696e61a"}, - {file = "hiredis-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39efab176fca3d5111075f6ba56cd864f18db46d858289d39360c5672e0e5c3e"}, - {file = "hiredis-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1110eae007f30e70a058d743e369c24430327cd01fd97d99519d6794a58dd587"}, - {file = "hiredis-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b390f63191bcccbb6044d4c118acdf4fa55f38e5658ac4cfd5a33a6f0c07659"}, - {file = "hiredis-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:72a98ccc7b8ec9ce0100ecf59f45f05d2023606e8e3676b07a316d1c1c364072"}, - {file = "hiredis-3.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7c76e751fd1e2f221dec09cdc24040ee486886e943d5d7ffc256e8cf15c75e51"}, - {file = "hiredis-3.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7d3880f213b6f14e9c69ce52beffd1748eecc8669698c4782761887273b6e1bd"}, - {file = "hiredis-3.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:87c2b3fe7e7c96eba376506a76e11514e07e848f737b254e0973e4b5c3a491e9"}, - {file = "hiredis-3.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d3cfb4089e96f8f8ee9554da93148a9261aa6612ad2cc202c1a494c7b712e31f"}, - {file = "hiredis-3.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f12018e5c5f866a1c3f7017cb2d88e5c6f9440df2281e48865a2b6c40f247f4"}, - {file = "hiredis-3.1.0-cp311-cp311-win32.whl", hash = "sha256:107b66ce977bb2dff8f2239e68344360a75d05fed3d9fa0570ac4d3020ce2396"}, - {file = "hiredis-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8f1240bde53d3d1676f0aba61b3661560dc9a681cae24d9de33e650864029aa4"}, - {file = "hiredis-3.1.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:f7c7f89e0bc4246115754e2eda078a111282f6d6ecc6fb458557b724fe6f2aac"}, - {file = "hiredis-3.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:3dbf9163296fa45fbddcfc4c5900f10e9ddadda37117dbfb641e327e536b53e0"}, - {file = "hiredis-3.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af46a4be0e82df470f68f35316fa16cd1e134d1c5092fc1082e1aad64cce716d"}, - {file = "hiredis-3.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc63d698c43aea500a84d8b083f830c03808b6cf3933ae4d35a27f0a3d881652"}, - {file = "hiredis-3.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:676b3d88674134bfaaf70dac181d1790b0f33b3187bfb9da9221e17e0e624f83"}, - {file = "hiredis-3.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aed10d9df1e2fb0011db2713ac64497462e9c2c0208b648c97569da772b959ca"}, - {file = "hiredis-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b5bd8adfe8742e331a94cccd782bffea251fa70d9a709e71f4510f50794d700"}, - {file = "hiredis-3.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9fc4e35b4afb0af6da55495dd0742ad32ab88150428a6ecdbb3085cbd60714e8"}, - {file = "hiredis-3.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89b83e76eb00ab0464e7b0752a3ffcb02626e742e9509bc141424a9c3202e8dc"}, - {file = "hiredis-3.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:98ebf08c907836b70a8f40e030df8ab6f174dc7f6fa765251d813e89f14069d8"}, - {file = "hiredis-3.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6c840b9cec086328f2ee2cfee0038b5d6bbb514bac7b5e579da6e346eaac056c"}, - {file = "hiredis-3.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:c5c44e9fa6f4462d0330cb5f5d46fa652512fc86b41d4d1974d0356f263e9105"}, - {file = "hiredis-3.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e665b14ab50aa175cfa306fcb00fffd4e3ff02ceb36ca6a4df00b1246d6a73c4"}, - {file = "hiredis-3.1.0-cp312-cp312-win32.whl", hash = "sha256:bd33db977ac7af97e8d035ffadb163b00546be22e5f1297b2123f5f9bf0f8a21"}, - {file = "hiredis-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:37aed4aa9348600145e2d019c7be27855e503ecc4906c6976ff2f3b52e3d5d97"}, - {file = "hiredis-3.1.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:b87cddd8107487863fed6994de51e5594a0be267b0b19e213694e99cdd614623"}, - {file = "hiredis-3.1.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:d302deff8cb63a7feffc1844e4dafc8076e566bbf10c5aaaf0f4fe791b8a6bd0"}, - {file = "hiredis-3.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a018340c073cf88cb635b2bedff96619df2f666018c655e7911f46fa2c1c178"}, - {file = "hiredis-3.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1e8ba6414ac1ae536129e18c069f3eb497df5a74e136e3566471620a4fa5f95"}, - {file = "hiredis-3.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a86b9fef256c2beb162244791fdc025aa55f936d6358e86e2020e512fe2e4972"}, - {file = "hiredis-3.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7acdc68e29a446ad17aadaff19c981a36b3bd8c894c3520412c8a7ab1c3e0de7"}, - {file = "hiredis-3.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7e06baea05de57e1e7548064f505a6964e992674fe61b8f274afe2ac93b6371"}, - {file = "hiredis-3.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35b5fc061c8a0dbfdb440053280504d6aaa8d9726bd4d1d0e1cfcbbdf0d60b73"}, - {file = "hiredis-3.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c89d2dcb271d24c44f02264233b75d5db8c58831190fa92456a90b87fa17b748"}, - {file = "hiredis-3.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:aa36688c10a08f626fddcf68c2b1b91b0e90b070c26e550a4151a877f5c2d431"}, - {file = "hiredis-3.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f3982a9c16c1c4bc05a00b65d01ffb8d80ea1a7b6b533be2f1a769d3e989d2c0"}, - {file = "hiredis-3.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d1a6f889514ee2452300c9a06862fceedef22a2891f1c421a27b1ba52ef130b2"}, - {file = "hiredis-3.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8a45ff7915392a55d9386bb235ea1d1eb9960615f301979f02143fc20036b699"}, - {file = "hiredis-3.1.0-cp313-cp313-win32.whl", hash = "sha256:539e5bb725b62b76a5319a4e68fc7085f01349abc2316ef3df608ea0883c51d2"}, - {file = "hiredis-3.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9020fd7e58f489fda6a928c31355add0e665fd6b87b21954e675cf9943eafa32"}, - {file = "hiredis-3.1.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:b621a89fc29b3f4b01be6640ec81a6a94b5382bc78fecb876408d57a071e45aa"}, - {file = "hiredis-3.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:363e21fba55e1a26349dc9ca7da6b14332123879b6359bcee4a9acecb40ca33b"}, - {file = "hiredis-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c156156798729eadc9ab76ffee96c88b93cc1c3b493f4dd0a4341f53939194ee"}, - {file = "hiredis-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e38d8a325f9a6afac1b1c72d996d1add9e1b99696ce9410538ba5e9aa8fdba02"}, - {file = "hiredis-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3004ef7436feb7bfa61c0b36d422b8fb8c29aaa1a514c9405f0fdee5e9694dd3"}, - {file = "hiredis-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13f5b16f97d0bbd1c04ce367c49097d1214d60e11f9fee7ef2a9b54e0a6645c8"}, - {file = "hiredis-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:230dd0e77cb0f525f58a1306a7b4aaf078037fc5229110922332ca46f90821bb"}, - {file = "hiredis-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d968116caddd19d63120d1298e62b1bbc694db3360ed0d5df8c3a97edbc12552"}, - {file = "hiredis-3.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:511e36a6fa41d3efab3cd5cd70ac388ed825993b9e66fa3b0e47cf27a2f5ffee"}, - {file = "hiredis-3.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:c5cd20804e3cb0d31e7d899d8dd091f569c33fe40d4bade670a067ab7d31c2ac"}, - {file = "hiredis-3.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:09e89e7d34cfe5ca8f7a869fca827d1af0afe8aaddb26b38c01058730edb79ad"}, - {file = "hiredis-3.1.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:570cbf31413c77fe5e7c157f2943ca4400493ddd9cf2184731cfcafc753becd7"}, - {file = "hiredis-3.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b9b4da8162cf289781732d6a5ba01d820c42c05943fcdb7de307d03639961db3"}, - {file = "hiredis-3.1.0-cp38-cp38-win32.whl", hash = "sha256:bc117a04bcb461d3bb1b2c5b417aee3442e1e8aa33ebc800481431f4c09fe0c5"}, - {file = "hiredis-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:34f3f5f0354db2d6797a6fb08d2c036a50af62a1d919d122c1c784304ef49347"}, - {file = "hiredis-3.1.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:a26fa888025badb5563f283cc19594c215a413e905729e59a5f7cf3f46d66c32"}, - {file = "hiredis-3.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:f50763cd819d4a52a47b5966d4bb47dee34b637c5fa6402509800eee6ecb61e6"}, - {file = "hiredis-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b6d1c9e1fce5e0a94072667ae2bf0142b89ebbb1917d3531184e060a43f3ee11"}, - {file = "hiredis-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e38d7a56b1a79ed0bbb9e6fe376d82e3f4dcc646ae47472f2c858e19a597c112"}, - {file = "hiredis-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ef5ad8b91530e4d10a68562b0a380ea22705a60e88cecee086d7c63a38564ce"}, - {file = "hiredis-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf3d2299b054e57a9f97ca08704c2843e44f29b57dc69b76a2592ecd212efe1a"}, - {file = "hiredis-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93811d60b0f73d0f049c86f4373a3833b4a38fce374ab151074d929553eb4304"}, - {file = "hiredis-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e703ff860c1d83abbcf57012b309ead02b56b60e85150c6c3bfb37cbb16ebf"}, - {file = "hiredis-3.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f9ea0678806c53d96758e74c6a898f9d506a2e3367a344757f768bef9e069366"}, - {file = "hiredis-3.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cf6844035abf47d52a1c3f4257255af3bf3b0f14d559b08eaa45885418c6c55d"}, - {file = "hiredis-3.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:7acf35cfa7ec9e1e7559c04e7095628f7d06049b5f24dcb58c1a55ef6dc689f8"}, - {file = "hiredis-3.1.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:b885695dce7a39b1fd9a609ed9c4cf312e53df2ec028d5a78af7a891b5fbea4d"}, - {file = "hiredis-3.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c22fa74ddd063396b19fe8445a1ae8b4190eff755d5750dda48e860a45b2ee7"}, - {file = "hiredis-3.1.0-cp39-cp39-win32.whl", hash = "sha256:0614e16339f1784df3bbd2800322e20b4127d3f3a3509f00a5562efddb2521aa"}, - {file = "hiredis-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:c2bc713ee73ab9de4a0d68b0ab0f29612342b63173714742437b977584adb2d8"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:07ab990d0835f36bf358dbb84db4541ac0a8f533128ec09af8f80a576eef2e88"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c54a88eb9d8ebc4e5eefaadbe2102a4f7499f9e413654172f40aefd25350959"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8095ef159896e5999a795b0f80e4d64281301a109e442a8d29cd750ca6bd8303"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f8ca13e2476ffd6d5be4763f5868133506ddcfa5ce54b4dac231ebdc19be6c6"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34d25aa25c10f966d5415795ed271da84605044dbf436c054966cea5442451b3"}, - {file = "hiredis-3.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4180dc5f646b426e5fa1212e1348c167ee2a864b3a70d56579163d64a847dd1e"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d92144e0cd6e6e841a6ad343e9d58631626eeb4ac96b0322649379b5d4527447"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb91ba42903de637b94a1b64477f381f94ad82c0742c264f9245be76a7a3cbc"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ce71a797b5bc02c51da082428c00251ed6a7a67a03acbda5fbf9e8d028725f6"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e04c7feb9467e3170cd4d5bee381775783d81bbc45d6147c1c0ce3b50dc04f9"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a31806306a60f3565c04c964d6bee0e9d4a5120e1da589e41976b53972edf635"}, - {file = "hiredis-3.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:bc51f594c2c0863ded6501642dc96701ca8bbea9ced4fa3af0a1aeda8aa634cb"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4663a319ab7d22c597b9421e5ea384fd583e044f2f1ca9a1b98d4fef8a0fea2f"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:8060fa256862b0c3de64a73ab45bc1ccf381caca464f2647af9075b200828948"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e9445b7f117a9c8c8ccad97cb44daa55ddccff3cbc9079984eac56d982ba01f"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:732cf1c5cf1324f7bf3b6086976fe62a2ca98f0bf6316f31063c2c67be8797bc"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2102a94063d878c40df92f55199637a74f535e3a0b79ceba4a00538853a21be3"}, - {file = "hiredis-3.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d968dde69e3fe903bf9ef00667669dcf04a3e096e33aaf138775106ead138bc8"}, - {file = "hiredis-3.1.0.tar.gz", hash = "sha256:51d40ac3611091020d7dea6b05ed62cb152bff595fa4f931e7b6479d777acf7c"}, + {file = "hiredis-3.2.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:add17efcbae46c5a6a13b244ff0b4a8fa079602ceb62290095c941b42e9d5dec"}, + {file = "hiredis-3.2.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:5fe955cc4f66c57df1ae8e5caf4de2925d43b5efab4e40859662311d1bcc5f54"}, + {file = "hiredis-3.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f9ad63cd9065820a43fb1efb8ed5ae85bb78f03ef5eb53f6bde47914708f5718"}, + {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e7f9e5fdba08841d78d4e1450cae03a4dbed2eda8a4084673cafa5615ce24a"}, + {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dce2508eca5d4e47ef38bc7c0724cb45abcdb0089f95a2ef49baf52882979a8"}, + {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186428bf353e4819abae15aa2ad64c3f40499d596ede280fe328abb9e98e72ce"}, + {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74f2500d90a0494843aba7abcdc3e77f859c502e0892112d708c02e1dcae8f90"}, + {file = "hiredis-3.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32822a94d2fdd1da96c05b22fdeef6d145d8fdbd865ba2f273f45eb949e4a805"}, + {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ead809fb08dd4fdb5b4b6e2999c834e78c3b0c450a07c3ed88983964432d0c64"}, + {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b90fada20301c3a257e868dd6a4694febc089b2b6d893fa96a3fc6c1f9ab4340"}, + {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:6d8bff53f526da3d9db86c8668011e4f7ca2958ee3a46c648edab6fe2cd1e709"}, + {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:043d929ae262d03e1db0f08616e14504a9119c1ff3de13d66f857d85cd45caff"}, + {file = "hiredis-3.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8d470fef39d02dbe5c541ec345cc4ffd7d2baec7d6e59c92bd9d9545dc221829"}, + {file = "hiredis-3.2.1-cp310-cp310-win32.whl", hash = "sha256:efa4c76c45cc8c42228c7989b279fa974580e053b5e6a4a834098b5324b9eafa"}, + {file = "hiredis-3.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbac5ec3a620b095c46ef3a8f1f06da9c86c1cdc411d44a5f538876c39a2b321"}, + {file = "hiredis-3.2.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:e4ae0be44cab5e74e6e4c4a93d04784629a45e781ff483b136cc9e1b9c23975c"}, + {file = "hiredis-3.2.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:24647e84c9f552934eb60b7f3d2116f8b64a7020361da9369e558935ca45914d"}, + {file = "hiredis-3.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fb3e92d1172da8decc5f836bf8b528c0fc9b6d449f1353e79ceeb9dc1801132"}, + {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38ba7a32e51e518b6b3e470142e52ed2674558e04d7d73d86eb19ebcb37d7d40"}, + {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4fc632be73174891d6bb71480247e57b2fd8f572059f0a1153e4d0339e919779"}, + {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f03e6839ff21379ad3c195e0700fc9c209e7f344946dea0f8a6d7b5137a2a141"}, + {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99983873e37c71bb71deb544670ff4f9d6920dab272aaf52365606d87a4d6c73"}, + {file = "hiredis-3.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffd982c419f48e3a57f592678c72474429465bb4bfc96472ec805f5d836523f0"}, + {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc993f4aa4abc029347f309e722f122e05a3b8a0c279ae612849b5cc9dc69f2d"}, + {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dde790d420081f18b5949227649ccb3ed991459df33279419a25fcae7f97cd92"}, + {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b0c8cae7edbef860afcf3177b705aef43e10b5628f14d5baf0ec69668247d08d"}, + {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e8a90eaca7e1ce7f175584f07a2cdbbcab13f4863f9f355d7895c4d28805f65b"}, + {file = "hiredis-3.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:476031958fa44e245e803827e0787d49740daa4de708fe514370293ce519893a"}, + {file = "hiredis-3.2.1-cp311-cp311-win32.whl", hash = "sha256:eb3f5df2a9593b4b4b676dce3cea53b9c6969fc372875188589ddf2bafc7f624"}, + {file = "hiredis-3.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:1402e763d8a9fdfcc103bbf8b2913971c0a3f7b8a73deacbda3dfe5f3a9d1e0b"}, + {file = "hiredis-3.2.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:3742d8b17e73c198cabeab11da35f2e2a81999d406f52c6275234592256bf8e8"}, + {file = "hiredis-3.2.1-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9c2f3176fb617a79f6cccf22cb7d2715e590acb534af6a82b41f8196ad59375d"}, + {file = "hiredis-3.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a8bd46189c7fa46174e02670dc44dfecb60f5bd4b67ed88cb050d8f1fd842f09"}, + {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f86ee4488c8575b58139cdfdddeae17f91e9a893ffee20260822add443592e2f"}, + {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3717832f4a557b2fe7060b9d4a7900e5de287a15595e398c3f04df69019ca69d"}, + {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e5cb12c21fb9e2403d28c4e6a38120164973342d34d08120f2d7009b66785644"}, + {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:080fda1510bbd389af91f919c11a4f2aa4d92f0684afa4709236faa084a42cac"}, + {file = "hiredis-3.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1252e10a1f3273d1c6bf2021e461652c2e11b05b83e0915d6eb540ec7539afe2"}, + {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d9e320e99ab7d2a30dc91ff6f745ba38d39b23f43d345cdee9881329d7b511d6"}, + {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:641668f385f16550fdd6fdc109b0af6988b94ba2acc06770a5e06a16e88f320c"}, + {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1e1f44208c39d6c345ff451f82f21e9eeda6fe9af4ac65972cc3eeb58d41f7cb"}, + {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:f882a0d6415fffe1ffcb09e6281d0ba8b1ece470e866612bbb24425bf76cf397"}, + {file = "hiredis-3.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4e78719a0730ebffe335528531d154bc8867a246418f74ecd88adbc4d938c49"}, + {file = "hiredis-3.2.1-cp312-cp312-win32.whl", hash = "sha256:33c4604d9f79a13b84da79950a8255433fca7edaf292bbd3364fd620864ed7b2"}, + {file = "hiredis-3.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7b9749375bf9d171aab8813694f379f2cff0330d7424000f5e92890ad4932dc9"}, + {file = "hiredis-3.2.1-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:7cabf7f1f06be221e1cbed1f34f00891a7bdfad05b23e4d315007dd42148f3d4"}, + {file = "hiredis-3.2.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:db85cb86f8114c314d0ec6d8de25b060a2590b4713135240d568da4f7dea97ac"}, + {file = "hiredis-3.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c9a592a49b7b8497e4e62c3ff40700d0c7f1a42d145b71e3e23c385df573c964"}, + {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0079ef1e03930b364556b78548e67236ab3def4e07e674f6adfc52944aa972dd"}, + {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d6a290ed45d9c14f4c50b6bda07afb60f270c69b5cb626fd23a4c2fde9e3da1"}, + {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79dd5fe8c0892769f82949adeb021342ca46871af26e26945eb55d044fcdf0d0"}, + {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998a82281a159f4aebbfd4fb45cfe24eb111145206df2951d95bc75327983b58"}, + {file = "hiredis-3.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41fc3cd52368ffe7c8e489fb83af5e99f86008ed7f9d9ba33b35fec54f215c0a"}, + {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8d10df3575ce09b0fa54b8582f57039dcbdafde5de698923a33f601d2e2a246c"}, + {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1ab010d04be33735ad8e643a40af0d68a21d70a57b1d0bff9b6a66b28cca9dbf"}, + {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ec3b5f9ea34f70aaba3e061cbe1fa3556fea401d41f5af321b13e326792f3017"}, + {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:158dfb505fff6bffd17f823a56effc0c2a7a8bc4fb659d79a52782f22eefc697"}, + {file = "hiredis-3.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d632cd0ddd7895081be76748e6fb9286f81d2a51c371b516541c6324f2fdac9"}, + {file = "hiredis-3.2.1-cp313-cp313-win32.whl", hash = "sha256:e9726d03e7df068bf755f6d1ecc61f7fc35c6b20363c7b1b96f39a14083df940"}, + {file = "hiredis-3.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:b5b1653ad7263a001f2e907e81a957d6087625f9700fa404f1a2268c0a4f9059"}, + {file = "hiredis-3.2.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:ef27728a8ceaa038ef4b6efc0e4473b7643b5c873c2fff5475e2c8b9c8d2e0d5"}, + {file = "hiredis-3.2.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:1039d8d2e1d2a1528ad9f9e289e8aa8eec9bf4b4759be4d453a2ab406a70a800"}, + {file = "hiredis-3.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83a8cd0eb6e535c93aad9c21e3e85bcb7dd26d3ff9b8ab095287be86e8af2f59"}, + {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6fc1e8f78bcdc7e25651b7d96d19b983b843b575904d96642f97ae157797ae4"}, + {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ddfa9a10fda3bea985a3b371a64553731141aaa0a20cbcc62a0e659f05e6c01"}, + {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e789ee008752b9be82a7bed82e36b62053c7cc06a0179a5a403ba5b2acba5bd8"}, + {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bf271877947a0f3eb9dc331688404a2e4cc246bca61bc5a1e2d62da9a1caad8"}, + {file = "hiredis-3.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9ad404fd0fdbdfe74e55ebb0592ab4169eecfe70ccf0db80eedc1d9943dd6d7"}, + {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:979572c602bdea0c3df255545c8c257f2163dd6c10d1f172268ffa7a6e1287d6"}, + {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f74e3d899be057fb00444ea5f7ae1d7389d393bddf0f3ed698997aa05563483b"}, + {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:a015666d5fdc3ca704f68db9850d0272ddcfb27e9f26a593013383f565ed2ad7"}, + {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:259a3389dfe3390e356c2796b6bc96a778695e9d7d40c82121096a6b8a2dd3c6"}, + {file = "hiredis-3.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:39f469891d29f0522712265de76018ab83a64b85ac4b4f67e1f692cbd42a03f9"}, + {file = "hiredis-3.2.1-cp38-cp38-win32.whl", hash = "sha256:73aa0508f26cd6cb4dfdbe189b28fb3162fd171532e526e90a802363b88027f8"}, + {file = "hiredis-3.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:2b910f12d7bcaf5ffc056087fc7b2d23e688f166462c31b73a0799d12891378d"}, + {file = "hiredis-3.2.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:523a241d9f268bc0c7306792f58f9c633185f939a19abc0356c55f078d3901c5"}, + {file = "hiredis-3.2.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:fec453a038c262e18d7de4919220b2916e0b17d1eadd12e7a800f09f78f84f39"}, + {file = "hiredis-3.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e75a49c5927453c316665cfa39f4274081d00ce69b137b393823eb90c66a8371"}, + {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd974cbe8b3ae8d3e7f60675e6da10383da69f029147c2c93d1a7e44b36d1290"}, + {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12d3b8fff9905e44f357417159d64138a32500dbd0d5cffaddbb2600d3ce33b1"}, + {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e21985804a40cb91e69e35ae321eb4e3610cd61a2cbc0328ab73a245f608fa1c"}, + {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e26e2b49a9569f44a2a2d743464ff0786b46fb1124ed33d2a1bd8b1c660c25b"}, + {file = "hiredis-3.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ef1ebf9ee8e0b4a895b86a02a8b7e184b964c43758393532966ecb8a256f37c"}, + {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c936b690dd31d7af74f707fc9003c500315b4c9ad70fa564aff73d1283b3b37a"}, + {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4909666bcb73270bb806aa00d0eee9e81f7a1aca388aafb4ba7dfcf5d344d23a"}, + {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d74a2ad25bc91ca9639e4485099852e6263b360b2c3650fdd3cc47762c5db3fa"}, + {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e99910088df446ee64d64b160835f592fb4d36189fcc948dd204e903d91fffa3"}, + {file = "hiredis-3.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:54423bd7af93a773edc6f166341cfb0e5f35ef42ca07b93f568f672a6f445e40"}, + {file = "hiredis-3.2.1-cp39-cp39-win32.whl", hash = "sha256:4a5365cb6d7be82d3c6d523b369bc0bc1a64987e88ed6ecfabadda2aa1cf4fa4"}, + {file = "hiredis-3.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a2eb02b6aaf4f1425a408e892c0378ba6cb6b45b1412c30dd258df1322d88c0"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:73913d2fa379e722d17ba52f21ce12dd578140941a08efd73e73b6fab1dea4d8"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:15a3dff3eca31ecbf3d7d6d104cf1b318dc2b013bad3f4bdb2839cb9ea2e1584"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78258032c2f9fc6f39fee7b07882ce26de281e09178266ce535992572132d95"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578d6a881e64e46db065256355594e680202c3bacf3270be3140057171d2c23e"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b7f34b170093c077c972b8cc0ceb15d8ff88ad0079751a8ae9733e94d77e733"}, + {file = "hiredis-3.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:291a18b228fc90f6720d178de2fac46522082c96330b4cc2d3dd8cb2c1cb2815"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f53d2af5a7cd33a4b4d7ba632dce80c17823df6814ef5a8d328ed44c815a68e7"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:20bdf6dbdf77eb43b98bc53950f7711983042472199245d4c36448e6b4cb460f"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f43e5c50d76da15118c72b757216cf26c643d55bb1b3c86cad1ae49173971780"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e5bb5fe9834851d56c8543e52dcd2ac5275fb6772ebc97876e18c2e05a3300b"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53e348438b6452e3d14dddb95d071fe8eaf6f264f641cba999c10bf6359cf1d2"}, + {file = "hiredis-3.2.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e305f6c63a2abcbde6ce28958de2bb4dd0fd34c6ab3bde5a4410befd5df8c6b2"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:33f24b1152f684b54d6b9d09135d849a6df64b6982675e8cf972f8adfa2de9aa"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:01dd8ea88bf8363751857ca2eb8f13faad0c7d57a6369663d4d1160f225ab449"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b16946533535cbb5cc7d4b6fc009d32d22b0f9ac58e8eb6f144637b64f9a61d"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9a03886cad1076e9f7e9e411c402826a8eac6f56ba426ee84b88e6515574b7b"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a4f6340f1c378bce17c195d46288a796fcf213dd3e2a008c2c942b33ab58993"}, + {file = "hiredis-3.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9d64ddf29016d34e7e3bc4b3d36ca9ac8a94f9b2c13ac4b9d8a486862d91b95c"}, + {file = "hiredis-3.2.1.tar.gz", hash = "sha256:5a5f64479bf04dd829fe7029fad0ea043eac4023abc6e946668cbbec3493a78d"}, ] [[package]] @@ -1696,13 +1700,13 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.10" +version = "2.6.12" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.10-py2.py3-none-any.whl", hash = "sha256:5f34248f54136beed1a7ba6a6b5c4b6cf21ff495aac7c359e1ef831ae3b8ab25"}, - {file = "identify-2.6.10.tar.gz", hash = "sha256:45e92fd704f3da71cc3880036633f48b4b7265fd4de2b57627cb157216eb7eb8"}, + {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"}, + {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"}, ] [package.extras] @@ -1867,13 +1871,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.23.0" +version = "4.24.0" description = "An implementation of JSON Schema validation for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, - {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, + {file = "jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d"}, + {file = "jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196"}, ] [package.dependencies] @@ -2000,8 +2004,11 @@ files = [ {file = "lxml-5.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:7ce1a171ec325192c6a636b64c94418e71a1964f56d002cc28122fceff0b6121"}, {file = "lxml-5.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:795f61bcaf8770e1b37eec24edf9771b307df3af74d1d6f27d812e15a9ff3872"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29f451a4b614a7b5b6c2e043d7b64a15bd8304d7e767055e8ab68387a8cacf4e"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f7f991a68d20c75cb13c5c9142b2a3f9eb161f1f12a9489c82172d1f133c0"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4aa412a82e460571fad592d0f93ce9935a20090029ba08eca05c614f99b0cc92"}, + {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:ac7ba71f9561cd7d7b55e1ea5511543c0282e2b6450f122672a2694621d63b7e"}, {file = "lxml-5.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:c5d32f5284012deaccd37da1e2cd42f081feaa76981f0eaa474351b68df813c5"}, + {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:ce31158630a6ac85bddd6b830cffd46085ff90498b397bd0a259f59d27a12188"}, {file = "lxml-5.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:31e63621e073e04697c1b2d23fcb89991790eef370ec37ce4d5d469f40924ed6"}, {file = "lxml-5.4.0-cp37-cp37m-win32.whl", hash = "sha256:be2ba4c3c5b7900246a8f866580700ef0d538f2ca32535e991027bdaba944063"}, {file = "lxml-5.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:09846782b1ef650b321484ad429217f5154da4d6e786636c38e434fa32e94e49"}, @@ -2447,13 +2454,13 @@ ptyprocess = ">=0.5" [[package]] name = "platformdirs" -version = "4.3.7" +version = "4.3.8" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.9" files = [ - {file = "platformdirs-4.3.7-py3-none-any.whl", hash = "sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94"}, - {file = "platformdirs-4.3.7.tar.gz", hash = "sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"}, + {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, + {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, ] [package.extras] @@ -2463,18 +2470,18 @@ type = ["mypy (>=1.14.1)"] [[package]] name = "pluggy" -version = "1.5.0" +version = "1.6.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, - {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] [package.extras] dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["coverage", "pytest", "pytest-benchmark"] [[package]] name = "port-for" @@ -2544,13 +2551,13 @@ test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "psycopg" -version = "3.2.7" +version = "3.2.9" description = "PostgreSQL database adapter for Python" optional = false python-versions = ">=3.8" files = [ - {file = "psycopg-3.2.7-py3-none-any.whl", hash = "sha256:d39747d2d5b9658b69fa462ad21d31f1ba4a5722ad1d0cb952552bc0b4125451"}, - {file = "psycopg-3.2.7.tar.gz", hash = "sha256:9afa609c7ebf139827a38c0bf61be9c024a3ed743f56443de9d38e1efc260bf3"}, + {file = "psycopg-3.2.9-py3-none-any.whl", hash = "sha256:01a8dadccdaac2123c916208c96e06631641c0566b22005493f09663c7a8d3b6"}, + {file = "psycopg-3.2.9.tar.gz", hash = "sha256:2fbb46fcd17bc81f993f28c47f1ebea38d66ae97cc2dbc3cad73b37cefbff700"}, ] [package.dependencies] @@ -2558,8 +2565,8 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] -binary = ["psycopg-binary (==3.2.7)"] -c = ["psycopg-c (==3.2.7)"] +binary = ["psycopg-binary (==3.2.9)"] +c = ["psycopg-c (==3.2.9)"] dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.14)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] pool = ["psycopg-pool"] @@ -2578,7 +2585,6 @@ files = [ {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, - {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, @@ -2738,42 +2744,48 @@ dev = ["build", "flake8", "mypy", "pytest", "twine"] [[package]] name = "pysam" -version = "0.23.0" +version = "0.23.3" description = "Package for reading, manipulating, and writing genomic data" optional = true -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "pysam-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ee2ef5f3452bc84834163a881269efed56d1be5045865b5af74d010aee4b44c"}, - {file = "pysam-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a061857f8bd723f0f223ec986b0b955f28ddea3b330a767f3c39daaca5908e39"}, - {file = "pysam-0.23.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:2ed8717c12580e76b9656af231c03254bb745bf1afc6d6556d0a27626443c48d"}, - {file = "pysam-0.23.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:e681d04efe21040b3888f7484f84a2636433503cfb903bbf9b91d671726ceaed"}, - {file = "pysam-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ada6096aca1f188289832d7963137d5e9ffe9454d688c57a2cca563de4601545"}, - {file = "pysam-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:caf8f1cef87663d38228b01885f8ffb13d6a9bc2ba3ff958f79d6c1af3fb84c2"}, - {file = "pysam-0.23.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:81dc7b4418d6006ff3e16e5419f5d26a25d959efc1e9807cf56190ca0f68012a"}, - {file = "pysam-0.23.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:4923c614adf642ffc7620a76faf38f57f2d834b1ba4ab567596db2ac6266038f"}, - {file = "pysam-0.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5a795db60b648902d1886faf9d3575dbd3f199736fda27504b8237b684b74710"}, - {file = "pysam-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e454f282f6ace01c5c293e3f1bc4bb2ee844f6d5b8686bffe7e02d7e0089a73e"}, - {file = "pysam-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:4f1b976bff84b99acb90276f396e7853359a8ea3a2a5fbcb69f3ceed4027761d"}, - {file = "pysam-0.23.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4fd54bae72a832317aab23ad45970f9091ac2c7c233c5a6826941095fbd7f103"}, - {file = "pysam-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b13eb322ad3726b214df3fe54249af5d91bfca6e4a64abe9f293348edae397e8"}, - {file = "pysam-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:41272520d47a428c4d17441eab88d7c5b1ad609ba729cc0cd96960b8a8589e93"}, - {file = "pysam-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e0426928e676e5d9f8320cd09741be905f90be5c7133f3ad386c7d1be84930ff"}, - {file = "pysam-0.23.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1631c173363475c409352d9bf3ad96e7ff851ba903e5b979f55330f0b41d9b5d"}, - {file = "pysam-0.23.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:2572519dc4b668e8e45d38335233e119cc93fa671fa03099e6f651be032370a6"}, - {file = "pysam-0.23.0-cp36-cp36m-manylinux_2_28_aarch64.whl", hash = "sha256:338d7e292f76d157ba2c7bb3ccdd9a071f164b44250ea89672b4946e3518146f"}, - {file = "pysam-0.23.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:e32afcd92a6686696147bbad7e7a8d779791e495a9f1ab814daba5e211659716"}, - {file = "pysam-0.23.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e3616d3e0a86a87cfde1e89896315961a7a409170dca038c24a756309c8e0d5"}, - {file = "pysam-0.23.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:86c5ce45690348dcd11dfe7163624ba18dd945ba57d7a064593b78ddcb8e3e72"}, - {file = "pysam-0.23.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:45abd654f6f53e938238bb9472a7c0b992c384c0bef7e51f248b0502c99964b1"}, - {file = "pysam-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ab8d993dbbe5af916f1b5f0dac00705a1fa20185b64ed9f3dd66d15850338c65"}, - {file = "pysam-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a0ef49772d4546b2a75f0448b91cf9b9e6d4124bda8747e891383570d51b50e"}, - {file = "pysam-0.23.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:4e00a02e9d5e34902c90817d742c58862493b57ccb70144927869d28a91accfc"}, - {file = "pysam-0.23.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:3e1e12d078f3780173ee34990ba737427768fb332d3a1910aae5fb32bb2499fa"}, - {file = "pysam-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0bb793bd7fcabb416991607877fe4dfdc7109f73256ed5d2e1e7ed4a68c39167"}, - {file = "pysam-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fd936bdd2a2a7412e96a0797da052d7a50745f0254a934ee590d10485776be58"}, - {file = "pysam-0.23.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:03f50e567ccbac028aec7ce5edc1d4962489cfd5eeeee423ca276abe01dc0a24"}, - {file = "pysam-0.23.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:79201b1ed82308540e7be69de9f73719e381f45d9e0e6310a46c6e56092967b2"}, - {file = "pysam-0.23.0.tar.gz", hash = "sha256:81488b3c7e0efc614395e21acde8bdb21c7adafea31736e733173ac7afac0c3e"}, + {file = "pysam-0.23.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a0b99d875f293fad0bd9c9c923e8910c03af62d291ebb7d20e69ceaf39e383d4"}, + {file = "pysam-0.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:725a32970cf4ce322f4ab2a52b755163297027a0349f0d151537fe16bdf525e5"}, + {file = "pysam-0.23.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5fd54146c0a5a41e37b67212e3b9b0c123b73d1dd2ba58082d21dc2236c1b290"}, + {file = "pysam-0.23.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a7d6b3dcbf4756bd178e217fa391187edc5793f8f50c3034e585d1e4d282d29b"}, + {file = "pysam-0.23.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc391a099ca74a1134a1cf71812c8ddf9934ab9d6675f3a97fe299466f227a1f"}, + {file = "pysam-0.23.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d98ce73c07926d400c684773ce2521f03f78247a3dd6968c8206ba31b077b503"}, + {file = "pysam-0.23.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cb4c9c4eb245d643b60c7ec750d5554ebf17c6c9646f4f54439f94a3b3de15de"}, + {file = "pysam-0.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3449070e0bbe716f9eccd3911d2482476478fbad63f739378d0203f470a446d6"}, + {file = "pysam-0.23.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c6cb7069dcecca3d40bbe4a6d5adea5cafe483c11854892dbabd6e10e5776049"}, + {file = "pysam-0.23.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a7e9c835126f94ff57199e2f58e61436e12e84d47077e70aac8aa03531c4cc71"}, + {file = "pysam-0.23.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9b249367a4ad100e61afac9156bde6183c6119f2612bbd5d97ebe3153c643aed"}, + {file = "pysam-0.23.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a720cc0818aa84aca5ee4ef884fda82367598e77ec0c95d2050f670fb1fd0db5"}, + {file = "pysam-0.23.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69f90c0867fe43f04004bcea963f6b2e68b39180afab54bf551f61f43856638b"}, + {file = "pysam-0.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2310d72bfae7a0980d414156267e25b57aa221a768c11c087f3f7d00ceb9fed4"}, + {file = "pysam-0.23.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b2e45983efea190d374fcda0b6e0c835d6e9e474e02694729f3b3a14d680fa62"}, + {file = "pysam-0.23.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4099393fc5097b5081c7efaf46b0109e4f0a8ed18f86d497219a8bf739c73992"}, + {file = "pysam-0.23.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4f04b9aa9b23d767fe36652eacb8370791e3b56816a7e50553d52c65ccdce77f"}, + {file = "pysam-0.23.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:701843e5dc67c8eb217c3265039c699a5f83cce64fbc4225268141796e972353"}, + {file = "pysam-0.23.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2d3177c5b3e102bde297f86e079d23fa385ac88f16c4252502079ef368056d55"}, + {file = "pysam-0.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2b6f6891684213e89ee679c5ac786b4e845e7d39d24f6ea0e4d8ed8be9c34f48"}, + {file = "pysam-0.23.3-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:735b938b809f0dc19a389cf3cee04fe7a451e21e2b20d3e45fa6bc23016ae21d"}, + {file = "pysam-0.23.3-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b721ae4c9118e0c27e1500be278c3b62022c886eeb913ecabc0463fdf98da38f"}, + {file = "pysam-0.23.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:915bd2883eed08b16a41964a33923818e67166ca69a51086598d27287df6bb4f"}, + {file = "pysam-0.23.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b80f1092ba290b738d6ed230cc58cc75ca815fda441afe76cb4c25639aec7ee7"}, + {file = "pysam-0.23.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9bf6281fc4709125f5089b5c8f83ffcb1b911c4aa9c601a0a4f62beb1de82413"}, + {file = "pysam-0.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456fb5f1a22001cb237fcc5b2ec03960979e5e18a3171c8e0a0116e02d86f31a"}, + {file = "pysam-0.23.3-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:7565c85fc636d75029ef4e133461c513a848c2d0ecd0489571f4fde1efa22d3b"}, + {file = "pysam-0.23.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:ad3cf30c6a48f3e2751a0b78d36c47cd4b272249cb6428be655b46473676d8f9"}, + {file = "pysam-0.23.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:15945db1483fef9760f32cfa112af3c3b7d50d586edfaf245edce52b99bb5c25"}, + {file = "pysam-0.23.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:013738cca990e235c56a7200ccfa9f105d7144ef34c2683c1ae8086ee030238b"}, + {file = "pysam-0.23.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:83f6f22995fa9b89b619f0d932a6714108d0dd1536fff684d3e02257c3f59b3a"}, + {file = "pysam-0.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ecf7cbc3d15c84cbc14a6c00af0f866b8f5e6b8ea3d2a496f18ad87adf55bcc5"}, + {file = "pysam-0.23.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:be2283f2ff15346d6ac10ba3b4370359ac3c1afc34b99bb0f2f39e715749cb8b"}, + {file = "pysam-0.23.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:513fa67af426e9e01f82653654e384d7774d81876d7dc3020ad7f72aa1d9c309"}, + {file = "pysam-0.23.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fd35287d2f8d243d6e54746e8cd5df3eb6239b016e51e20bbca1a2b6ef5899df"}, + {file = "pysam-0.23.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7ddbf573f0d3c650a03f2dcb4cdce50d536d380dbbc692f434b1cfa0cd7da4d2"}, + {file = "pysam-0.23.3.tar.gz", hash = "sha256:9ebcb1f004b296fd139b103ec6fd7e415e80f89f194eb7d0d972ac6d11bbaf24"}, ] [[package]] @@ -3050,18 +3062,18 @@ typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "requests" -version = "2.32.3" +version = "2.32.4" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" files = [ - {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, - {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, + {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, + {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, ] [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" +charset_normalizer = ">=2,<4" idna = ">=2.5,<4" urllib3 = ">=1.21.1,<3" @@ -3090,125 +3102,128 @@ test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "tes [[package]] name = "rpds-py" -version = "0.24.0" +version = "0.25.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" files = [ - {file = "rpds_py-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:006f4342fe729a368c6df36578d7a348c7c716be1da0a1a0f86e3021f8e98724"}, - {file = "rpds_py-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2d53747da70a4e4b17f559569d5f9506420966083a31c5fbd84e764461c4444b"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8acd55bd5b071156bae57b555f5d33697998752673b9de554dd82f5b5352727"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7e80d375134ddb04231a53800503752093dbb65dad8dabacce2c84cccc78e964"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60748789e028d2a46fc1c70750454f83c6bdd0d05db50f5ae83e2db500b34da5"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e1daf5bf6c2be39654beae83ee6b9a12347cb5aced9a29eecf12a2d25fff664"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b221c2457d92a1fb3c97bee9095c874144d196f47c038462ae6e4a14436f7bc"}, - {file = "rpds_py-0.24.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:66420986c9afff67ef0c5d1e4cdc2d0e5262f53ad11e4f90e5e22448df485bf0"}, - {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:43dba99f00f1d37b2a0265a259592d05fcc8e7c19d140fe51c6e6f16faabeb1f"}, - {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a88c0d17d039333a41d9bf4616bd062f0bd7aa0edeb6cafe00a2fc2a804e944f"}, - {file = "rpds_py-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc31e13ce212e14a539d430428cd365e74f8b2d534f8bc22dd4c9c55b277b875"}, - {file = "rpds_py-0.24.0-cp310-cp310-win32.whl", hash = "sha256:fc2c1e1b00f88317d9de6b2c2b39b012ebbfe35fe5e7bef980fd2a91f6100a07"}, - {file = "rpds_py-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:c0145295ca415668420ad142ee42189f78d27af806fcf1f32a18e51d47dd2052"}, - {file = "rpds_py-0.24.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2d3ee4615df36ab8eb16c2507b11e764dcc11fd350bbf4da16d09cda11fcedef"}, - {file = "rpds_py-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e13ae74a8a3a0c2f22f450f773e35f893484fcfacb00bb4344a7e0f4f48e1f97"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf86f72d705fc2ef776bb7dd9e5fbba79d7e1f3e258bf9377f8204ad0fc1c51e"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c43583ea8517ed2e780a345dd9960896afc1327e8cf3ac8239c167530397440d"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4cd031e63bc5f05bdcda120646a0d32f6d729486d0067f09d79c8db5368f4586"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34d90ad8c045df9a4259c47d2e16a3f21fdb396665c94520dbfe8766e62187a4"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e838bf2bb0b91ee67bf2b889a1a841e5ecac06dd7a2b1ef4e6151e2ce155c7ae"}, - {file = "rpds_py-0.24.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04ecf5c1ff4d589987b4d9882872f80ba13da7d42427234fce8f22efb43133bc"}, - {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:630d3d8ea77eabd6cbcd2ea712e1c5cecb5b558d39547ac988351195db433f6c"}, - {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ebcb786b9ff30b994d5969213a8430cbb984cdd7ea9fd6df06663194bd3c450c"}, - {file = "rpds_py-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:174e46569968ddbbeb8a806d9922f17cd2b524aa753b468f35b97ff9c19cb718"}, - {file = "rpds_py-0.24.0-cp311-cp311-win32.whl", hash = "sha256:5ef877fa3bbfb40b388a5ae1cb00636a624690dcb9a29a65267054c9ea86d88a"}, - {file = "rpds_py-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:e274f62cbd274359eff63e5c7e7274c913e8e09620f6a57aae66744b3df046d6"}, - {file = "rpds_py-0.24.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d8551e733626afec514b5d15befabea0dd70a343a9f23322860c4f16a9430205"}, - {file = "rpds_py-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e374c0ce0ca82e5b67cd61fb964077d40ec177dd2c4eda67dba130de09085c7"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69d003296df4840bd445a5d15fa5b6ff6ac40496f956a221c4d1f6f7b4bc4d9"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8212ff58ac6dfde49946bea57474a386cca3f7706fc72c25b772b9ca4af6b79e"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:528927e63a70b4d5f3f5ccc1fa988a35456eb5d15f804d276709c33fc2f19bda"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a824d2c7a703ba6daaca848f9c3d5cb93af0505be505de70e7e66829affd676e"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44d51febb7a114293ffd56c6cf4736cb31cd68c0fddd6aa303ed09ea5a48e029"}, - {file = "rpds_py-0.24.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3fab5f4a2c64a8fb64fc13b3d139848817a64d467dd6ed60dcdd6b479e7febc9"}, - {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9be4f99bee42ac107870c61dfdb294d912bf81c3c6d45538aad7aecab468b6b7"}, - {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:564c96b6076a98215af52f55efa90d8419cc2ef45d99e314fddefe816bc24f91"}, - {file = "rpds_py-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:75a810b7664c17f24bf2ffd7f92416c00ec84b49bb68e6a0d93e542406336b56"}, - {file = "rpds_py-0.24.0-cp312-cp312-win32.whl", hash = "sha256:f6016bd950be4dcd047b7475fdf55fb1e1f59fc7403f387be0e8123e4a576d30"}, - {file = "rpds_py-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:998c01b8e71cf051c28f5d6f1187abbdf5cf45fc0efce5da6c06447cba997034"}, - {file = "rpds_py-0.24.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:3d2d8e4508e15fc05b31285c4b00ddf2e0eb94259c2dc896771966a163122a0c"}, - {file = "rpds_py-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0f00c16e089282ad68a3820fd0c831c35d3194b7cdc31d6e469511d9bffc535c"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951cc481c0c395c4a08639a469d53b7d4afa252529a085418b82a6b43c45c240"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9ca89938dff18828a328af41ffdf3902405a19f4131c88e22e776a8e228c5a8"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed0ef550042a8dbcd657dfb284a8ee00f0ba269d3f2286b0493b15a5694f9fe8"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b2356688e5d958c4d5cb964af865bea84db29971d3e563fb78e46e20fe1848b"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78884d155fd15d9f64f5d6124b486f3d3f7fd7cd71a78e9670a0f6f6ca06fb2d"}, - {file = "rpds_py-0.24.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6a4a535013aeeef13c5532f802708cecae8d66c282babb5cd916379b72110cf7"}, - {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:84e0566f15cf4d769dade9b366b7b87c959be472c92dffb70462dd0844d7cbad"}, - {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:823e74ab6fbaa028ec89615ff6acb409e90ff45580c45920d4dfdddb069f2120"}, - {file = "rpds_py-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c61a2cb0085c8783906b2f8b1f16a7e65777823c7f4d0a6aaffe26dc0d358dd9"}, - {file = "rpds_py-0.24.0-cp313-cp313-win32.whl", hash = "sha256:60d9b630c8025b9458a9d114e3af579a2c54bd32df601c4581bd054e85258143"}, - {file = "rpds_py-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:6eea559077d29486c68218178ea946263b87f1c41ae7f996b1f30a983c476a5a"}, - {file = "rpds_py-0.24.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:d09dc82af2d3c17e7dd17120b202a79b578d79f2b5424bda209d9966efeed114"}, - {file = "rpds_py-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5fc13b44de6419d1e7a7e592a4885b323fbc2f46e1f22151e3a8ed3b8b920405"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c347a20d79cedc0a7bd51c4d4b7dbc613ca4e65a756b5c3e57ec84bd43505b47"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20f2712bd1cc26a3cc16c5a1bfee9ed1abc33d4cdf1aabd297fe0eb724df4272"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aad911555286884be1e427ef0dc0ba3929e6821cbeca2194b13dc415a462c7fd"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0aeb3329c1721c43c58cae274d7d2ca85c1690d89485d9c63a006cb79a85771a"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a0f156e9509cee987283abd2296ec816225145a13ed0391df8f71bf1d789e2d"}, - {file = "rpds_py-0.24.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa6800adc8204ce898c8a424303969b7aa6a5e4ad2789c13f8648739830323b7"}, - {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a18fc371e900a21d7392517c6f60fe859e802547309e94313cd8181ad9db004d"}, - {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9168764133fd919f8dcca2ead66de0105f4ef5659cbb4fa044f7014bed9a1797"}, - {file = "rpds_py-0.24.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f6e3cec44ba05ee5cbdebe92d052f69b63ae792e7d05f1020ac5e964394080c"}, - {file = "rpds_py-0.24.0-cp313-cp313t-win32.whl", hash = "sha256:8ebc7e65ca4b111d928b669713865f021b7773350eeac4a31d3e70144297baba"}, - {file = "rpds_py-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:675269d407a257b8c00a6b58205b72eec8231656506c56fd429d924ca00bb350"}, - {file = "rpds_py-0.24.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a36b452abbf29f68527cf52e181fced56685731c86b52e852053e38d8b60bc8d"}, - {file = "rpds_py-0.24.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b3b397eefecec8e8e39fa65c630ef70a24b09141a6f9fc17b3c3a50bed6b50e"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdabcd3beb2a6dca7027007473d8ef1c3b053347c76f685f5f060a00327b8b65"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5db385bacd0c43f24be92b60c857cf760b7f10d8234f4bd4be67b5b20a7c0b6b"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8097b3422d020ff1c44effc40ae58e67d93e60d540a65649d2cdaf9466030791"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493fe54318bed7d124ce272fc36adbf59d46729659b2c792e87c3b95649cdee9"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8aa362811ccdc1f8dadcc916c6d47e554169ab79559319ae9fae7d7752d0d60c"}, - {file = "rpds_py-0.24.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8f9a6e7fd5434817526815f09ea27f2746c4a51ee11bb3439065f5fc754db58"}, - {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8205ee14463248d3349131bb8099efe15cd3ce83b8ef3ace63c7e976998e7124"}, - {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:921ae54f9ecba3b6325df425cf72c074cd469dea843fb5743a26ca7fb2ccb149"}, - {file = "rpds_py-0.24.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:32bab0a56eac685828e00cc2f5d1200c548f8bc11f2e44abf311d6b548ce2e45"}, - {file = "rpds_py-0.24.0-cp39-cp39-win32.whl", hash = "sha256:f5c0ed12926dec1dfe7d645333ea59cf93f4d07750986a586f511c0bc61fe103"}, - {file = "rpds_py-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:afc6e35f344490faa8276b5f2f7cbf71f88bc2cda4328e00553bd451728c571f"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:619ca56a5468f933d940e1bf431c6f4e13bef8e688698b067ae68eb4f9b30e3a"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:4b28e5122829181de1898c2c97f81c0b3246d49f585f22743a1246420bb8d399"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e5ab32cf9eb3647450bc74eb201b27c185d3857276162c101c0f8c6374e098"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:208b3a70a98cf3710e97cabdc308a51cd4f28aa6e7bb11de3d56cd8b74bab98d"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbc4362e06f950c62cad3d4abf1191021b2ffaf0b31ac230fbf0526453eee75e"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebea2821cdb5f9fef44933617be76185b80150632736f3d76e54829ab4a3b4d1"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9a4df06c35465ef4d81799999bba810c68d29972bf1c31db61bfdb81dd9d5bb"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d3aa13bdf38630da298f2e0d77aca967b200b8cc1473ea05248f6c5e9c9bdb44"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:041f00419e1da7a03c46042453598479f45be3d787eb837af382bfc169c0db33"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:d8754d872a5dfc3c5bf9c0e059e8107451364a30d9fd50f1f1a85c4fb9481164"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:896c41007931217a343eff197c34513c154267636c8056fb409eafd494c3dcdc"}, - {file = "rpds_py-0.24.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:92558d37d872e808944c3c96d0423b8604879a3d1c86fdad508d7ed91ea547d5"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f9e0057a509e096e47c87f753136c9b10d7a91842d8042c2ee6866899a717c0d"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d6e109a454412ab82979c5b1b3aee0604eca4bbf9a02693bb9df027af2bfa91a"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc1c892b1ec1f8cbd5da8de287577b455e388d9c328ad592eabbdcb6fc93bee5"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c39438c55983d48f4bb3487734d040e22dad200dab22c41e331cee145e7a50d"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d7e8ce990ae17dda686f7e82fd41a055c668e13ddcf058e7fb5e9da20b57793"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ea7f4174d2e4194289cb0c4e172d83e79a6404297ff95f2875cf9ac9bced8ba"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb2954155bb8f63bb19d56d80e5e5320b61d71084617ed89efedb861a684baea"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04f2b712a2206e13800a8136b07aaedc23af3facab84918e7aa89e4be0260032"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:eda5c1e2a715a4cbbca2d6d304988460942551e4e5e3b7457b50943cd741626d"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:9abc80fe8c1f87218db116016de575a7998ab1629078c90840e8d11ab423ee25"}, - {file = "rpds_py-0.24.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6a727fd083009bc83eb83d6950f0c32b3c94c8b80a9b667c87f4bd1274ca30ba"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e0f3ef95795efcd3b2ec3fe0a5bcfb5dadf5e3996ea2117427e524d4fbf309c6"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2c13777ecdbbba2077670285dd1fe50828c8742f6a4119dbef6f83ea13ad10fb"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79e8d804c2ccd618417e96720ad5cd076a86fa3f8cb310ea386a3e6229bae7d1"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd822f019ccccd75c832deb7aa040bb02d70a92eb15a2f16c7987b7ad4ee8d83"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0047638c3aa0dbcd0ab99ed1e549bbf0e142c9ecc173b6492868432d8989a046"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5b66d1b201cc71bc3081bc2f1fc36b0c1f268b773e03bbc39066651b9e18391"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbcbb6db5582ea33ce46a5d20a5793134b5365110d84df4e30b9d37c6fd40ad3"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63981feca3f110ed132fd217bf7768ee8ed738a55549883628ee3da75bb9cb78"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3a55fc10fdcbf1a4bd3c018eea422c52cf08700cf99c28b5cb10fe97ab77a0d3"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:c30ff468163a48535ee7e9bf21bd14c7a81147c0e58a36c1078289a8ca7af0bd"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:369d9c6d4c714e36d4a03957b4783217a3ccd1e222cdd67d464a3a479fc17796"}, - {file = "rpds_py-0.24.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:24795c099453e3721fda5d8ddd45f5dfcc8e5a547ce7b8e9da06fecc3832e26f"}, - {file = "rpds_py-0.24.0.tar.gz", hash = "sha256:772cc1b2cd963e7e17e6cc55fe0371fb9c704d63e44cacec7b9b7f523b78919e"}, + {file = "rpds_py-0.25.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f4ad628b5174d5315761b67f212774a32f5bad5e61396d38108bd801c0a8f5d9"}, + {file = "rpds_py-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c742af695f7525e559c16f1562cf2323db0e3f0fbdcabdf6865b095256b2d40"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:605ffe7769e24b1800b4d024d24034405d9404f0bc2f55b6db3362cd34145a6f"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc6f3ddef93243538be76f8e47045b4aad7a66a212cd3a0f23e34469473d36b"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f70316f760174ca04492b5ab01be631a8ae30cadab1d1081035136ba12738cfa"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1dafef8df605fdb46edcc0bf1573dea0d6d7b01ba87f85cd04dc855b2b4479e"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0701942049095741a8aeb298a31b203e735d1c61f4423511d2b1a41dcd8a16da"}, + {file = "rpds_py-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e87798852ae0b37c88babb7f7bbbb3e3fecc562a1c340195b44c7e24d403e380"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3bcce0edc1488906c2d4c75c94c70a0417e83920dd4c88fec1078c94843a6ce9"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e2f6a2347d3440ae789505693a02836383426249d5293541cd712e07e7aecf54"}, + {file = "rpds_py-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4fd52d3455a0aa997734f3835cbc4c9f32571345143960e7d7ebfe7b5fbfa3b2"}, + {file = "rpds_py-0.25.1-cp310-cp310-win32.whl", hash = "sha256:3f0b1798cae2bbbc9b9db44ee068c556d4737911ad53a4e5093d09d04b3bbc24"}, + {file = "rpds_py-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:3ebd879ab996537fc510a2be58c59915b5dd63bccb06d1ef514fee787e05984a"}, + {file = "rpds_py-0.25.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5f048bbf18b1f9120685c6d6bb70cc1a52c8cc11bdd04e643d28d3be0baf666d"}, + {file = "rpds_py-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fbb0dbba559959fcb5d0735a0f87cdbca9e95dac87982e9b95c0f8f7ad10255"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4ca54b9cf9d80b4016a67a0193ebe0bcf29f6b0a96f09db942087e294d3d4c2"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ee3e26eb83d39b886d2cb6e06ea701bba82ef30a0de044d34626ede51ec98b0"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89706d0683c73a26f76a5315d893c051324d771196ae8b13e6ffa1ffaf5e574f"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2013ee878c76269c7b557a9a9c042335d732e89d482606990b70a839635feb7"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45e484db65e5380804afbec784522de84fa95e6bb92ef1bd3325d33d13efaebd"}, + {file = "rpds_py-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48d64155d02127c249695abb87d39f0faf410733428d499867606be138161d65"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:048893e902132fd6548a2e661fb38bf4896a89eea95ac5816cf443524a85556f"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0317177b1e8691ab5879f4f33f4b6dc55ad3b344399e23df2e499de7b10a548d"}, + {file = "rpds_py-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bffcf57826d77a4151962bf1701374e0fc87f536e56ec46f1abdd6a903354042"}, + {file = "rpds_py-0.25.1-cp311-cp311-win32.whl", hash = "sha256:cda776f1967cb304816173b30994faaf2fd5bcb37e73118a47964a02c348e1bc"}, + {file = "rpds_py-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:dc3c1ff0abc91444cd20ec643d0f805df9a3661fcacf9c95000329f3ddf268a4"}, + {file = "rpds_py-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:5a3ddb74b0985c4387719fc536faced33cadf2172769540c62e2a94b7b9be1c4"}, + {file = "rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c"}, + {file = "rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea"}, + {file = "rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd"}, + {file = "rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb"}, + {file = "rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe"}, + {file = "rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192"}, + {file = "rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728"}, + {file = "rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559"}, + {file = "rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325"}, + {file = "rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98"}, + {file = "rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd"}, + {file = "rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31"}, + {file = "rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500"}, + {file = "rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5"}, + {file = "rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129"}, + {file = "rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194"}, + {file = "rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72"}, + {file = "rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66"}, + {file = "rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523"}, + {file = "rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763"}, + {file = "rpds_py-0.25.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ce4c8e485a3c59593f1a6f683cf0ea5ab1c1dc94d11eea5619e4fb5228b40fbd"}, + {file = "rpds_py-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8222acdb51a22929c3b2ddb236b69c59c72af4019d2cba961e2f9add9b6e634"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4593c4eae9b27d22df41cde518b4b9e4464d139e4322e2127daa9b5b981b76be"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd035756830c712b64725a76327ce80e82ed12ebab361d3a1cdc0f51ea21acb0"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:114a07e85f32b125404f28f2ed0ba431685151c037a26032b213c882f26eb908"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dec21e02e6cc932538b5203d3a8bd6aa1480c98c4914cb88eea064ecdbc6396a"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09eab132f41bf792c7a0ea1578e55df3f3e7f61888e340779b06050a9a3f16e9"}, + {file = "rpds_py-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c98f126c4fc697b84c423e387337d5b07e4a61e9feac494362a59fd7a2d9ed80"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0e6a327af8ebf6baba1c10fadd04964c1965d375d318f4435d5f3f9651550f4a"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc120d1132cff853ff617754196d0ac0ae63befe7c8498bd67731ba368abe451"}, + {file = "rpds_py-0.25.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:140f61d9bed7839446bdd44852e30195c8e520f81329b4201ceead4d64eb3a9f"}, + {file = "rpds_py-0.25.1-cp39-cp39-win32.whl", hash = "sha256:9c006f3aadeda131b438c3092124bd196b66312f0caa5823ef09585a669cf449"}, + {file = "rpds_py-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:a61d0b2c7c9a0ae45732a77844917b427ff16ad5464b4d4f5e4adb955f582890"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b24bf3cd93d5b6ecfbedec73b15f143596c88ee249fa98cefa9a9dc9d92c6f28"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:0eb90e94f43e5085623932b68840b6f379f26db7b5c2e6bcef3179bd83c9330f"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d50e4864498a9ab639d6d8854b25e80642bd362ff104312d9770b05d66e5fb13"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c9409b47ba0650544b0bb3c188243b83654dfe55dcc173a86832314e1a6a35d"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:796ad874c89127c91970652a4ee8b00d56368b7e00d3477f4415fe78164c8000"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85608eb70a659bf4c1142b2781083d4b7c0c4e2c90eff11856a9754e965b2540"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4feb9211d15d9160bc85fa72fed46432cdc143eb9cf6d5ca377335a921ac37b"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ccfa689b9246c48947d31dd9d8b16d89a0ecc8e0e26ea5253068efb6c542b76e"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3c5b317ecbd8226887994852e85de562f7177add602514d4ac40f87de3ae45a8"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:454601988aab2c6e8fd49e7634c65476b2b919647626208e376afcd22019eeb8"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:1c0c434a53714358532d13539272db75a5ed9df75a4a090a753ac7173ec14e11"}, + {file = "rpds_py-0.25.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f73ce1512e04fbe2bc97836e89830d6b4314c171587a99688082d090f934d20a"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ee86d81551ec68a5c25373c5643d343150cc54672b5e9a0cafc93c1870a53954"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89c24300cd4a8e4a51e55c31a8ff3918e6651b241ee8876a42cc2b2a078533ba"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:771c16060ff4e79584dc48902a91ba79fd93eade3aa3a12d6d2a4aadaf7d542b"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:785ffacd0ee61c3e60bdfde93baa6d7c10d86f15655bd706c89da08068dc5038"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a40046a529cc15cef88ac5ab589f83f739e2d332cb4d7399072242400ed68c9"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:85fc223d9c76cabe5d0bff82214459189720dc135db45f9f66aa7cffbf9ff6c1"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0be9965f93c222fb9b4cc254235b3b2b215796c03ef5ee64f995b1b69af0762"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8378fa4a940f3fb509c081e06cb7f7f2adae8cf46ef258b0e0ed7519facd573e"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:33358883a4490287e67a2c391dfaea4d9359860281db3292b6886bf0be3d8692"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:1d1fadd539298e70cac2f2cb36f5b8a65f742b9b9f1014dd4ea1f7785e2470bf"}, + {file = "rpds_py-0.25.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a46c2fb2545e21181445515960006e85d22025bd2fe6db23e76daec6eb689fe"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:50f2c501a89c9a5f4e454b126193c5495b9fb441a75b298c60591d8a2eb92e1b"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d779b325cc8238227c47fbc53964c8cc9a941d5dbae87aa007a1f08f2f77b23"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:036ded36bedb727beeabc16dc1dad7cb154b3fa444e936a03b67a86dc6a5066e"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245550f5a1ac98504147cba96ffec8fabc22b610742e9150138e5d60774686d7"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff7c23ba0a88cb7b104281a99476cccadf29de2a0ef5ce864959a52675b1ca83"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e37caa8cdb3b7cf24786451a0bdb853f6347b8b92005eeb64225ae1db54d1c2b"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2f48ab00181600ee266a095fe815134eb456163f7d6699f525dee471f312cf"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e5fc7484fa7dce57e25063b0ec9638ff02a908304f861d81ea49273e43838c1"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c10228d6cf6fe2b63d2e7985e94f6916fa46940df46b70449e9ff9297bd3d1"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:5d9e40f32745db28c1ef7aad23f6fc458dc1e29945bd6781060f0d15628b8ddf"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:35a8d1a24b5936b35c5003313bc177403d8bdef0f8b24f28b1c4a255f94ea992"}, + {file = "rpds_py-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6099263f526efff9cf3883dfef505518730f7a7a93049b1d90d42e50a22b4793"}, + {file = "rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3"}, ] [[package]] @@ -3271,13 +3286,13 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "setuptools" -version = "80.3.1" +version = "80.9.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-80.3.1-py3-none-any.whl", hash = "sha256:ea8e00d7992054c4c592aeb892f6ad51fe1b4d90cc6947cc45c45717c40ec537"}, - {file = "setuptools-80.3.1.tar.gz", hash = "sha256:31e2c58dbb67c99c289f51c16d899afedae292b978f8051efaf6262d8212f927"}, + {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, + {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, ] [package.extras] @@ -3469,68 +3484,68 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.40" +version = "2.0.41" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.40-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae9597cab738e7cc823f04a704fb754a9249f0b6695a6aeb63b74055cd417a96"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37a5c21ab099a83d669ebb251fddf8f5cee4d75ea40a5a1653d9c43d60e20867"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bece9527f5a98466d67fb5d34dc560c4da964240d8b09024bb21c1246545e04e"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:8bb131ffd2165fae48162c7bbd0d97c84ab961deea9b8bab16366543deeab625"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9408fd453d5f8990405cc9def9af46bfbe3183e6110401b407c2d073c3388f47"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-win32.whl", hash = "sha256:00a494ea6f42a44c326477b5bee4e0fc75f6a80c01570a32b57e89cf0fbef85a"}, - {file = "SQLAlchemy-2.0.40-cp37-cp37m-win_amd64.whl", hash = "sha256:c7b927155112ac858357ccf9d255dd8c044fd9ad2dc6ce4c4149527c901fa4c3"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f1ea21bef99c703f44444ad29c2c1b6bd55d202750b6de8e06a955380f4725d7"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:afe63b208153f3a7a2d1a5b9df452b0673082588933e54e7c8aac457cf35e758"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8aae085ea549a1eddbc9298b113cffb75e514eadbb542133dd2b99b5fb3b6af"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ea9181284754d37db15156eb7be09c86e16e50fbe77610e9e7bee09291771a1"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5434223b795be5c5ef8244e5ac98056e290d3a99bdcc539b916e282b160dda00"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15d08d5ef1b779af6a0909b97be6c1fd4298057504eb6461be88bd1696cb438e"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-win32.whl", hash = "sha256:cd2f75598ae70bcfca9117d9e51a3b06fe29edd972fdd7fd57cc97b4dbf3b08a"}, - {file = "sqlalchemy-2.0.40-cp310-cp310-win_amd64.whl", hash = "sha256:2cbafc8d39ff1abdfdda96435f38fab141892dc759a2165947d1a8fffa7ef596"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6bacab7514de6146a1976bc56e1545bee247242fab030b89e5f70336fc0003e"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5654d1ac34e922b6c5711631f2da497d3a7bffd6f9f87ac23b35feea56098011"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35904d63412db21088739510216e9349e335f142ce4a04b69e2528020ee19ed4"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c7a80ed86d6aaacb8160a1caef6680d4ddd03c944d985aecee940d168c411d1"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:519624685a51525ddaa7d8ba8265a1540442a2ec71476f0e75241eb8263d6f51"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2ee5f9999a5b0e9689bed96e60ee53c3384f1a05c2dd8068cc2e8361b0df5b7a"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-win32.whl", hash = "sha256:c0cae71e20e3c02c52f6b9e9722bca70e4a90a466d59477822739dc31ac18b4b"}, - {file = "sqlalchemy-2.0.40-cp311-cp311-win_amd64.whl", hash = "sha256:574aea2c54d8f1dd1699449f332c7d9b71c339e04ae50163a3eb5ce4c4325ee4"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9d3b31d0a1c44b74d3ae27a3de422dfccd2b8f0b75e51ecb2faa2bf65ab1ba0d"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f7a0f506cf78c80450ed1e816978643d3969f99c4ac6b01104a6fe95c5490a"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb933a650323e476a2e4fbef8997a10d0003d4da996aad3fd7873e962fdde4d"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6959738971b4745eea16f818a2cd086fb35081383b078272c35ece2b07012716"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:110179728e442dae85dd39591beb74072ae4ad55a44eda2acc6ec98ead80d5f2"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8040680eaacdce4d635f12c55c714f3d4c7f57da2bc47a01229d115bd319191"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-win32.whl", hash = "sha256:650490653b110905c10adac69408380688cefc1f536a137d0d69aca1069dc1d1"}, - {file = "sqlalchemy-2.0.40-cp312-cp312-win_amd64.whl", hash = "sha256:2be94d75ee06548d2fc591a3513422b873490efb124048f50556369a834853b0"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:915866fd50dd868fdcc18d61d8258db1bf9ed7fbd6dfec960ba43365952f3b01"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a4c5a2905a9ccdc67a8963e24abd2f7afcd4348829412483695c59e0af9a705"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55028d7a3ebdf7ace492fab9895cbc5270153f75442a0472d8516e03159ab364"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cfedff6878b0e0d1d0a50666a817ecd85051d12d56b43d9d425455e608b5ba0"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bb19e30fdae77d357ce92192a3504579abe48a66877f476880238a962e5b96db"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16d325ea898f74b26ffcd1cf8c593b0beed8714f0317df2bed0d8d1de05a8f26"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-win32.whl", hash = "sha256:a669cbe5be3c63f75bcbee0b266779706f1a54bcb1000f302685b87d1b8c1500"}, - {file = "sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl", hash = "sha256:641ee2e0834812d657862f3a7de95e0048bdcb6c55496f39c6fa3d435f6ac6ad"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:50f5885bbed261fc97e2e66c5156244f9704083a674b8d17f24c72217d29baf5"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cf0e99cdb600eabcd1d65cdba0d3c91418fee21c4aa1d28db47d095b1064a7d8"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe147fcd85aaed53ce90645c91ed5fca0cc88a797314c70dfd9d35925bd5d106"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf7cee56bd552385c1ee39af360772fbfc2f43be005c78d1140204ad6148438"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4aeb939bcac234b88e2d25d5381655e8353fe06b4e50b1c55ecffe56951d18c2"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c268b5100cfeaa222c40f55e169d484efa1384b44bf9ca415eae6d556f02cb08"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-win32.whl", hash = "sha256:46628ebcec4f23a1584fb52f2abe12ddb00f3bb3b7b337618b80fc1b51177aff"}, - {file = "sqlalchemy-2.0.40-cp38-cp38-win_amd64.whl", hash = "sha256:7e0505719939e52a7b0c65d20e84a6044eb3712bb6f239c6b1db77ba8e173a37"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c884de19528e0fcd9dc34ee94c810581dd6e74aef75437ff17e696c2bfefae3e"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1abb387710283fc5983d8a1209d9696a4eae9db8d7ac94b402981fe2fe2e39ad"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cfa124eda500ba4b0d3afc3e91ea27ed4754e727c7f025f293a22f512bcd4c9"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6b28d303b9d57c17a5164eb1fd2d5119bb6ff4413d5894e74873280483eeb5"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b5a5bbe29c10c5bfd63893747a1bf6f8049df607638c786252cb9243b86b6706"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f0fda83e113bb0fb27dc003685f32a5dcb99c9c4f41f4fa0838ac35265c23b5c"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-win32.whl", hash = "sha256:957f8d85d5e834397ef78a6109550aeb0d27a53b5032f7a57f2451e1adc37e98"}, - {file = "sqlalchemy-2.0.40-cp39-cp39-win_amd64.whl", hash = "sha256:1ffdf9c91428e59744f8e6f98190516f8e1d05eec90e936eb08b257332c5e870"}, - {file = "sqlalchemy-2.0.40-py3-none-any.whl", hash = "sha256:32587e2e1e359276957e6fe5dad089758bc042a971a8a09ae8ecf7a8fe23d07a"}, - {file = "sqlalchemy-2.0.40.tar.gz", hash = "sha256:d827099289c64589418ebbcaead0145cd19f4e3e8a93919a0100247af245fa00"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6854175807af57bdb6425e47adbce7d20a4d79bbfd6f6d6519cd10bb7109a7f8"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05132c906066142103b83d9c250b60508af556982a385d96c4eaa9fb9720ac2b"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b4af17bda11e907c51d10686eda89049f9ce5669b08fbe71a29747f1e876036"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:c0b0e5e1b5d9f3586601048dd68f392dc0cc99a59bb5faf18aab057ce00d00b2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0b3dbf1e7e9bc95f4bac5e2fb6d3fb2f083254c3fdd20a1789af965caf2d2348"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win32.whl", hash = "sha256:1e3f196a0c59b0cae9a0cd332eb1a4bda4696e863f4f1cf84ab0347992c548c2"}, + {file = "SQLAlchemy-2.0.41-cp37-cp37m-win_amd64.whl", hash = "sha256:6ab60a5089a8f02009f127806f777fca82581c49e127f08413a66056bd9166dd"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b1f09b6821406ea1f94053f346f28f8215e293344209129a9c0fcc3578598d7b"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1936af879e3db023601196a1684d28e12f19ccf93af01bf3280a3262c4b6b4e5"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2ac41acfc8d965fb0c464eb8f44995770239668956dc4cdf502d1b1ffe0d747"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c24e0c0fde47a9723c81d5806569cddef103aebbf79dbc9fcbb617153dea30"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23a8825495d8b195c4aa9ff1c430c28f2c821e8c5e2d98089228af887e5d7e29"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:60c578c45c949f909a4026b7807044e7e564adf793537fc762b2489d522f3d11"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win32.whl", hash = "sha256:118c16cd3f1b00c76d69343e38602006c9cfb9998fa4f798606d28d63f23beda"}, + {file = "sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl", hash = "sha256:7492967c3386df69f80cf67efd665c0f667cee67032090fe01d7d74b0e19bb08"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6375cd674fe82d7aa9816d1cb96ec592bac1726c11e0cafbf40eeee9a4516b5f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8c9fdd15a55d9465e590a402f42082705d66b05afc3ffd2d2eb3c6ba919560"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f9dc8c44acdee06c8fc6440db9eae8b4af8b01e4b1aee7bdd7241c22edff4f"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c11ceb9a1f482c752a71f203a81858625d8df5746d787a4786bca4ffdf71c6"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:911cc493ebd60de5f285bcae0491a60b4f2a9f0f5c270edd1c4dbaef7a38fc04"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03968a349db483936c249f4d9cd14ff2c296adfa1290b660ba6516f973139582"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win32.whl", hash = "sha256:293cd444d82b18da48c9f71cd7005844dbbd06ca19be1ccf6779154439eec0b8"}, + {file = "sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl", hash = "sha256:3d3549fc3e40667ec7199033a4e40a2f669898a00a7b18a931d3efb4c7900504"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6"}, + {file = "sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f"}, + {file = "sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:90144d3b0c8b139408da50196c5cad2a6909b51b23df1f0538411cd23ffa45d3"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:023b3ee6169969beea3bb72312e44d8b7c27c75b347942d943cf49397b7edeb5"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:725875a63abf7c399d4548e686debb65cdc2549e1825437096a0af1f7e374814"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81965cc20848ab06583506ef54e37cf15c83c7e619df2ad16807c03100745dea"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dd5ec3aa6ae6e4d5b5de9357d2133c07be1aff6405b136dad753a16afb6717dd"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ff8e80c4c4932c10493ff97028decfdb622de69cae87e0f127a7ebe32b4069c6"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win32.whl", hash = "sha256:4d44522480e0bf34c3d63167b8cfa7289c1c54264c2950cc5fc26e7850967e45"}, + {file = "sqlalchemy-2.0.41-cp38-cp38-win_amd64.whl", hash = "sha256:81eedafa609917040d39aa9332e25881a8e7a0862495fcdf2023a9667209deda"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9a420a91913092d1e20c86a2f5f1fc85c1a8924dbcaf5e0586df8aceb09c9cc2"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:906e6b0d7d452e9a98e5ab8507c0da791856b2380fdee61b765632bb8698026f"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a373a400f3e9bac95ba2a06372c4fd1412a7cee53c37fc6c05f829bf672b8769"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:087b6b52de812741c27231b5a3586384d60c353fbd0e2f81405a814b5591dc8b"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:34ea30ab3ec98355235972dadc497bb659cc75f8292b760394824fab9cf39826"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8280856dd7c6a68ab3a164b4a4b1c51f7691f6d04af4d4ca23d6ecf2261b7923"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win32.whl", hash = "sha256:b50eab9994d64f4a823ff99a0ed28a6903224ddbe7fef56a6dd865eec9243440"}, + {file = "sqlalchemy-2.0.41-cp39-cp39-win_amd64.whl", hash = "sha256:5e22575d169529ac3e0a120cf050ec9daa94b6a9597993d1702884f6954a7d71"}, + {file = "sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576"}, + {file = "sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9"}, ] [package.dependencies] @@ -3722,24 +3737,24 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "types-awscrt" -version = "0.26.1" +version = "0.27.2" description = "Type annotations and code completion for awscrt" optional = false python-versions = ">=3.8" files = [ - {file = "types_awscrt-0.26.1-py3-none-any.whl", hash = "sha256:176d320a26990efc057d4bf71396e05be027c142252ac48cc0d87aaea0704280"}, - {file = "types_awscrt-0.26.1.tar.gz", hash = "sha256:aca96f889b3745c0e74f42f08f277fed3bf6e9baa2cf9b06a36f78d77720e504"}, + {file = "types_awscrt-0.27.2-py3-none-any.whl", hash = "sha256:49a045f25bbd5ad2865f314512afced933aed35ddbafc252e2268efa8a787e4e"}, + {file = "types_awscrt-0.27.2.tar.gz", hash = "sha256:acd04f57119eb15626ab0ba9157fc24672421de56e7bd7b9f61681fedee44e91"}, ] [[package]] name = "types-pyasn1" -version = "0.6.0.20250208" +version = "0.6.0.20250516" description = "Typing stubs for pyasn1" optional = false python-versions = ">=3.9" files = [ - {file = "types_pyasn1-0.6.0.20250208-py3-none-any.whl", hash = "sha256:4a4fef8801efa983a88a1db52f6d7ac70cb1ad48abf25fad59134237603d5334"}, - {file = "types_pyasn1-0.6.0.20250208.tar.gz", hash = "sha256:14c6a0463555fb74ec81aca67ad2eb6d8409895359238511a3eceda7b5426235"}, + {file = "types_pyasn1-0.6.0.20250516-py3-none-any.whl", hash = "sha256:b9925e4e22e09eed758b93b6f2a7881b89d842c2373dd11c09b173567d170142"}, + {file = "types_pyasn1-0.6.0.20250516.tar.gz", hash = "sha256:1a9b35a4f033cd70c384a5043a3407b2cc07afc95900732b66e0d38426c7541d"}, ] [[package]] @@ -3758,24 +3773,24 @@ types-pyasn1 = "*" [[package]] name = "types-pytz" -version = "2025.2.0.20250326" +version = "2025.2.0.20250516" description = "Typing stubs for pytz" optional = false python-versions = ">=3.9" files = [ - {file = "types_pytz-2025.2.0.20250326-py3-none-any.whl", hash = "sha256:3c397fd1b845cd2b3adc9398607764ced9e578a98a5d1fbb4a9bc9253edfb162"}, - {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, + {file = "types_pytz-2025.2.0.20250516-py3-none-any.whl", hash = "sha256:e0e0c8a57e2791c19f718ed99ab2ba623856b11620cb6b637e5f62ce285a7451"}, + {file = "types_pytz-2025.2.0.20250516.tar.gz", hash = "sha256:e1216306f8c0d5da6dafd6492e72eb080c9a166171fa80dd7a1990fd8be7a7b3"}, ] [[package]] name = "types-pyyaml" -version = "6.0.12.20250402" +version = "6.0.12.20250516" description = "Typing stubs for PyYAML" optional = false python-versions = ">=3.9" files = [ - {file = "types_pyyaml-6.0.12.20250402-py3-none-any.whl", hash = "sha256:652348fa9e7a203d4b0d21066dfb00760d3cbd5a15ebb7cf8d33c88a49546681"}, - {file = "types_pyyaml-6.0.12.20250402.tar.gz", hash = "sha256:d7c13c3e6d335b6af4b0122a01ff1d270aba84ab96d1a1a1063ecba3e13ec075"}, + {file = "types_pyyaml-6.0.12.20250516-py3-none-any.whl", hash = "sha256:8478208feaeb53a34cb5d970c56a7cd76b72659442e733e268a94dc72b2d0530"}, + {file = "types_pyyaml-6.0.12.20250516.tar.gz", hash = "sha256:9f21a70216fc0fa1b216a8176db5f9e0af6eb35d2f2932acb87689d03a5bf6ba"}, ] [[package]] @@ -3808,13 +3823,13 @@ urllib3 = ">=2" [[package]] name = "types-s3transfer" -version = "0.12.0" +version = "0.13.0" description = "Type annotations and code completion for s3transfer" optional = false python-versions = ">=3.8" files = [ - {file = "types_s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:101bbc5b7f00b71512374df881f480fc6bf63c948b5098ab024bf3370fbfb0e8"}, - {file = "types_s3transfer-0.12.0.tar.gz", hash = "sha256:f8f59201481e904362873bf0be3267f259d60ad946ebdfcb847d092a1fa26f98"}, + {file = "types_s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:79c8375cbf48a64bff7654c02df1ec4b20d74f8c5672fc13e382f593ca5565b3"}, + {file = "types_s3transfer-0.13.0.tar.gz", hash = "sha256:203dadcb9865c2f68fb44bc0440e1dc05b79197ba4a641c0976c26c9af75ef52"}, ] [[package]] @@ -3830,13 +3845,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.13.2" -description = "Backported and Experimental Type Hints for Python 3.8+" +version = "4.14.0" +description = "Backported and Experimental Type Hints for Python 3.9+" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, - {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, + {file = "typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af"}, + {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"}, ] [[package]] @@ -3885,13 +3900,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.34.2" +version = "0.34.3" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.9" files = [ - {file = "uvicorn-0.34.2-py3-none-any.whl", hash = "sha256:deb49af569084536d269fe0a6d67e3754f104cf03aba7c11c40f01aadf33c403"}, - {file = "uvicorn-0.34.2.tar.gz", hash = "sha256:0e929828f6186353a80b58ea719861d2629d766293b6d19baf086ba31d4f3328"}, + {file = "uvicorn-0.34.3-py3-none-any.whl", hash = "sha256:16246631db62bdfbf069b0645177d6e8a77ba950cfedbfd093acef9444e4d885"}, + {file = "uvicorn-0.34.3.tar.gz", hash = "sha256:35919a9a979d7a59334b6b10e05d77c1d0d574c50e0fc98b8b1a0f165708b55a"}, ] [package.dependencies] @@ -3902,12 +3917,12 @@ httptools = {version = ">=0.6.3", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -3962,13 +3977,13 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", [[package]] name = "virtualenv" -version = "20.31.1" +version = "20.31.2" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.31.1-py3-none-any.whl", hash = "sha256:f448cd2f1604c831afb9ea238021060be2c0edbcad8eb0a4e8b4e14ff11a5482"}, - {file = "virtualenv-20.31.1.tar.gz", hash = "sha256:65442939608aeebb9284cd30baca5865fcd9f12b58bb740a24b220030df46d26"}, + {file = "virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11"}, + {file = "virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af"}, ] [package.dependencies] @@ -4203,13 +4218,13 @@ pyodbc = ["pyodbc"] [[package]] name = "zipp" -version = "3.21.0" +version = "3.23.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = true python-versions = ">=3.9" files = [ - {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, - {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, + {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, + {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, ] [package.extras] @@ -4217,7 +4232,7 @@ check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] [extras] @@ -4226,4 +4241,4 @@ server = ["alembic", "alembic-utils", "arq", "authlib", "biocommons", "boto3", " [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "47c32224ef18a3d012cb690d45dff501ba1a413d2281c079c4a68761fcf229f0" +content-hash = "3d97dec0debc662e3b1464b9204a253aa7651c17cb8addccf36089b4ed448726" diff --git a/pyproject.toml b/pyproject.toml index b6d708ad..4c6530db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ orcid = { version = "~1.0.3", optional = true } psycopg2 = { version = "~2.9.3", optional = true } python-jose = { extras = ["cryptography"], version = "~3.4.0", optional = true } python-multipart = { version = "~0.0.5", optional = true } -requests = { version = "~2.32.0", optional = true } +requests = { version = "~2.32.2", optional = true } starlette = { version = "~0.27.0", optional = true } starlette-context = { version = "^0.3.6", optional = true } slack-sdk = { version = "~3.21.3", optional = true } From d8f3b37df39339e454829269e3a6ddb9ce86148f Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Jun 2025 12:49:10 -0700 Subject: [PATCH 166/166] Bump version to 2025.2.0 in pyproject.toml and __init__.py --- pyproject.toml | 2 +- src/mavedb/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c6530db..6bb60440 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "mavedb" -version = "2025.1.2" +version = "2025.2.0" description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect." license = "AGPL-3.0-only" readme = "README.md" diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py index 73ad9014..087f29ba 100644 --- a/src/mavedb/__init__.py +++ b/src/mavedb/__init__.py @@ -6,6 +6,6 @@ logger = module_logging.getLogger(__name__) __project__ = "mavedb-api" -__version__ = "2025.1.2" +__version__ = "2025.2.0" logger.info(f"MaveDB {__version__}")