diff --git a/.gitmodules b/.gitmodules index ba6b9cb..7bc1704 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "submodules/cat_vrs"] path = submodules/cat_vrs url = https://github.com/ga4gh/cat-vrs - branch = 1.x + branch = 1.0.0-ballot.2024-11 diff --git a/pyproject.toml b/pyproject.toml index b95c9bb..796b17c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ keywords = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - "ga4gh.vrs~=2.0.0a12", + "ga4gh.vrs==2.0.0a13", "pydantic==2.*", ] @@ -137,7 +137,8 @@ ignore = [ # S101 - assert # B011 - assert-false "tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"] -"src/ga4gh/cat_vrs/*models.py" = ["ANN102"] +"src/ga4gh/cat_vrs/models.py" = ["ANN102", "N815"] +"src/ga4gh/cat_vrs/recipes.py" = ["ANN102", "N815"] [tool.setuptools.packages.find] where = ["src"] diff --git a/src/ga4gh/cat_vrs/__init__.py b/src/ga4gh/cat_vrs/__init__.py index cd05ec7..5c586b5 100644 --- a/src/ga4gh/cat_vrs/__init__.py +++ b/src/ga4gh/cat_vrs/__init__.py @@ -1,5 +1,5 @@ """Package for Cat-VRS Python implementation""" -from . import profile_models as cat_vrs_models +from . import models, recipes -__all__ = ["cat_vrs_models"] +__all__ = ["models", "recipes"] diff --git a/src/ga4gh/cat_vrs/core_models.py b/src/ga4gh/cat_vrs/core_models.py deleted file mode 100644 index fb8623a..0000000 --- a/src/ga4gh/cat_vrs/core_models.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Define Pydantic models for GA4GH categorical variation objects. - -See the `CatVar page `_ on -the GA4GH website for more information. -""" - -from enum import Enum -from typing import Literal - -from ga4gh.core.entity_models import IRI, Coding, DomainEntity -from ga4gh.vrs.models import CopyChange, Location, Range, Variation -from pydantic import BaseModel, Field, RootModel, field_validator - - -class Relation(str, Enum): - """Defined relationships between members of the categorical variant and the defining - context. ``sequence_liftover`` refers to variants or locations that represent a - congruent concept on a differing assembly of a human genome (e.g. "GRCh37" and - "GRCh38") or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection`` - refers to variants or locations that occur on transcripts projected from the defined - genomic concept. ``codon_translation`` refers to variants or locations that - translate from the codon(s) represented by the defined concept. - """ - - SEQUENCE_LIFTOVER = "sequence_liftover" - TRANSCRIPT_PROJECTION = "transcript_projection" - CODON_TRANSLATION = "codon_translation" - - -class DefiningContextConstraint(BaseModel): - """The location or location-state, congruent with other reference sequences, about - which categorical variation is being described. - """ - - type: Literal["DefiningContextConstraint"] = Field( - "DefiningContextConstraint", description="MUST be 'DefiningContextConstraint'" - ) - definingContext: Variation | Location | IRI # noqa: N815 - relations: list[Relation] | None = Field( - None, - description="Defined relationships between members of the categorical variant and the defining context. ``sequence_liftover`` refers to variants or locations that represent a congruent concept on a differing assembly of a human genome (e.g. 'GRCh37' and 'GRCh38') or gene (e.g. Locus Reference Genomic) sequence. ``transcript_projection`` refers to variants or locations that occur on transcripts projected from the defined genomic concept. ``codon_translation`` refers to variants or locations that translate from the codon(s) represented by the defined concept.", - ) - - -class CopyCountConstraint(BaseModel): - """The absolute number of copies in a system""" - - type: Literal["CopyCountConstraint"] = Field( - "CopyCountConstraint", description="MUST be 'CopyCountConstraint'" - ) - copies: int | Range - - -class CopyChangeConstraint(BaseModel): - """A representation of copy number change""" - - type: Literal["CopyChangeConstraint"] = Field( - "CopyChangeConstraint", description="MUST be 'CopyChangeConstraint'" - ) - copyChange: Coding # noqa: N815 - - @field_validator("copyChange") - @classmethod - def validate_copy_change(cls, v: Coding) -> Coding: - """Validate copyChange property - - :param v: copyChange value - :raises ValueError: If ``copyChange.code`` is not a valid CopyChange - :return: copyChange property - """ - try: - CopyChange(v.code.root) - except ValueError as e: - err_msg = f"copyChange, {v.code.root}, not one of {[cc.value for cc in CopyChange]}" - raise ValueError(err_msg) from e - return v - - -class Constraint(RootModel): - """Constraints are used to construct an intensional semantics of categorical variant types.""" - - root: DefiningContextConstraint | CopyCountConstraint | CopyChangeConstraint = ( - Field(..., discriminator="type") - ) - - -class CategoricalVariant(DomainEntity): - """A representation of a categorically-defined domain for variation, in which - individual contextual variation instances may be members of the domain. - """ - - type: Literal["CategoricalVariant"] = Field( - "CategoricalVariant", description="MUST be 'CategoricalVariant'" - ) - members: list[Variation | IRI] | None = Field( - None, - description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", - ) - constraints: list[Constraint] | None = None diff --git a/src/ga4gh/cat_vrs/models.py b/src/ga4gh/cat_vrs/models.py new file mode 100644 index 0000000..d350e58 --- /dev/null +++ b/src/ga4gh/cat_vrs/models.py @@ -0,0 +1,133 @@ +"""Define Pydantic models for GA4GH categorical variation objects. + +See the `CatVar page `_ on +the GA4GH website for more information. +""" + +from enum import Enum +from typing import Literal + +from ga4gh.core.models import ( + ConceptMapping, + Entity, + MappableConcept, + iriReference, +) +from ga4gh.vrs.models import Allele, CopyChange, Range, SequenceLocation, Variation +from pydantic import BaseModel, Field, RootModel, field_validator + + +class Relation(str, Enum): + """Defined relationships between members of the categorical variant and the defining + context. + """ + + TRANSLATES_FROM = "translates_from" + LIFTOVER_TO = "liftover_to" + TRANSCRIBES_TO = "transcribes_to" + + +class DefiningAlleleConstraint(BaseModel): + """The defining allele and its associated relationships that are congruent with + member variants. + """ + + type: Literal["DefiningAlleleConstraint"] = Field( + "DefiningAlleleConstraint", description="MUST be 'DefiningAlleleConstraint'" + ) + allele: Allele | iriReference + relations: list[MappableConcept] | None = Field( + None, + description="Defined relationships from which members relate to the defining allele.", + ) + + +class DefiningLocationConstraint(BaseModel): + """The defining location and its associated relationships that are congruent with + member locations. + """ + + type: Literal["DefiningLocationConstraint"] = Field( + "DefiningLocationConstraint", description="MUST be 'DefiningLocationConstraint'" + ) + location: SequenceLocation | iriReference + relations: list[MappableConcept] | None = Field( + None, + description="Defined relationships from which members relate to the defining location.", + ) + matchCharacteristic: MappableConcept = Field( + ..., + description="A characteristic of the location that is used to match the defining location to member locations.", + ) + + +class CopyCountConstraint(BaseModel): + """The exact or range of copies that members of this categorical variant must + satisfy. + """ + + type: Literal["CopyCountConstraint"] = Field( + "CopyCountConstraint", description="MUST be 'CopyCountConstraint'" + ) + copies: int | Range = Field( + ..., + description="The precise value or range of copies members of this categorical variant must satisfy.", + ) + + +class CopyChangeConstraint(BaseModel): + """A representation of copy number change""" + + type: Literal["CopyChangeConstraint"] = Field( + "CopyChangeConstraint", description="MUST be 'CopyChangeConstraint'" + ) + copyChange: str = Field( + ..., + description="The relative assessment of the change in copies that members of this categorical variant satisfies.", + ) + + @field_validator("copyChange") + @classmethod + def validate_copy_change(cls, v: str) -> str: + """Validate copyChange property + + :param v: copyChange value + :raises ValueError: If ``copyChange.code`` is not a valid CopyChange + :return: copyChange property + """ + try: + CopyChange(v) + except ValueError as e: + err_msg = f"copyChange, {v}, not one of {[cc.value for cc in CopyChange]}" + raise ValueError(err_msg) from e + return v + + +class Constraint(RootModel): + """Constraints are used to construct an intensional semantics of categorical variant types.""" + + root: ( + DefiningAlleleConstraint + | DefiningLocationConstraint + | CopyCountConstraint + | CopyChangeConstraint + ) = Field(..., discriminator="type") + + +class CategoricalVariant(Entity): + """A representation of a categorically-defined domain for variation, in which + individual Constraintual variation instances may be members of the domain. + """ + + type: Literal["CategoricalVariant"] = Field( + "CategoricalVariant", description="MUST be 'CategoricalVariant'" + ) + members: list[Variation | iriReference] | None = Field( + None, + description="A non-exhaustive list of VRS variation Constraints that satisfy the constraints of this categorical variant.", + ) + constraints: list[Constraint] | None = None + mappings: list[ConceptMapping] | None = Field( + None, + description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.", + ) diff --git a/src/ga4gh/cat_vrs/profile_models.py b/src/ga4gh/cat_vrs/profile_models.py deleted file mode 100644 index c18b612..0000000 --- a/src/ga4gh/cat_vrs/profile_models.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Define Pydantic models for GA4GH categorical variation objects. - -See the `CatVar page `_ on -the GA4GH website for more information. -""" - -from enum import Enum - -from ga4gh.cat_vrs.core_models import ( - CategoricalVariant, - Constraint, - CopyChangeConstraint, - CopyCountConstraint, - DefiningContextConstraint, - Relation, -) -from pydantic import BaseModel, Field, field_validator - - -class CatVrsType(str, Enum): - """Define CatVRS types""" - - PROTEIN_SEQ_CONS = "ProteinSequenceConsequence" - CANONICAL_ALLELE = "CanonicalAllele" - CATEGORICAL_CNV = "CategoricalCnv" - DESCRIBED_VAR = "DescribedVariation" - NUMBER_COUNT = "NumberCount" - NUMBER_CHANGE = "NumberChange" - QUANTITY_VARIANCE = "QuantityVariance" - - -class ProteinSequenceConsequenceProperties(BaseModel): - """Cat-VRS Constraints found in Protein Sequence Consequences.""" - - constraints: list[Constraint] = Field(..., min_length=1) - - @field_validator("constraints") - @classmethod - def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: - """Validate constraints property - - :param v: Constraints property to validate - :raises ValueError: If none of the ``relations`` contains - ``Relation.CODON_TRANSLATION.value`` exactly once. - :return: Constraints property - """ - if not any( - constraint.relations.count(Relation.CODON_TRANSLATION) == 1 - for constraint in v - ): - err_msg = f"At least one `relations` in `constraints` must contain `{Relation.CODON_TRANSLATION.value}` exactly once." - raise ValueError(err_msg) - - return v - - -class ProteinSequenceConsequence( - ProteinSequenceConsequenceProperties, CategoricalVariant -): - """A change that occurs in a protein sequence as a result of genomic changes. Due to - the degenerate nature of the genetic code, there are often several genomic changes - that can cause a protein sequence consequence. - The protein sequence consequence, like a :ref:`CanonicalAllele`, is defined by an - `Allele `_ - that is representative of a collection of congruent Protein Alleles that share the - same altered codon(s). - """ - - -class CanonicalAlleleProperties(BaseModel): - """Cat-VRS Constraints found in Canonical Alleles.""" - - constraints: list[Constraint] = Field(..., min_length=1) - - @field_validator("constraints") - @classmethod - def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: - """Validate constraints property - - :param v: Constraints property to validate - :raises ValueError: If none of the ``relations`` contains both - ``Relation.SEQUENCE_LIFTOVER`` and ``Relation.TRANSCRIPT_PROJECTION`` - exactly once. - :return: Constraints property - """ - if not any( - ( - constraint.relations.count(Relation.SEQUENCE_LIFTOVER) == 1 - and constraint.relations.count(Relation.TRANSCRIPT_PROJECTION) == 1 - ) - for constraint in v - ): - err_msg = f"At least one `relations` in `constraints` must contain {Relation.SEQUENCE_LIFTOVER} and {Relation.TRANSCRIPT_PROJECTION} exactly once." - raise ValueError(err_msg) - - return v - - -class CanonicalAllele(CanonicalAlleleProperties, CategoricalVariant): - """A canonical allele is defined by an - `Allele `_ - that is representative of a collection of congruent Alleles, each of which depict - the same nucleic acid change on different underlying reference sequences. Congruent - representations of an Allele often exist across different genome assemblies and - associated cDNA transcript representations. - """ - - -class CategoricalCnvProperties(BaseModel): - """Cat-VRS Constraints found in CategoricalCnvs.""" - - constraints: list[Constraint] = Field(..., min_length=1) - - @field_validator("constraints") - @classmethod - def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: - """Validate constraints property - - :param v: Constraints property to validate - :raises ValueError: If no ``DefiningContextConstraint`` with - ``Relation.SEQUENCE_LIFTOVER`` in ``relations`` is found in ``constraints`` - or if neither ``CopyCountConstraint`` nor ``CopyChangeConstraint`` is found - in ``constraints``. - :return: Constraints property - """ - defining_context_found = False - copy_found = False - - for constraint in v: - if not defining_context_found: - defining_context_found = ( - isinstance(constraint, DefiningContextConstraint) - and constraint.relations - and Relation.SEQUENCE_LIFTOVER in constraint.relations - ) - - if not copy_found: - copy_found = isinstance( - constraint, CopyChangeConstraint | CopyCountConstraint - ) - - if not defining_context_found: - err_msg = f"At least one item in `constraints` must be a `DefiningContextConstraint`` and contain ``{Relation.SEQUENCE_LIFTOVER}` in `relations`." - raise ValueError(err_msg) - - if not copy_found: - err_msg = "At least one item in `constraints` must be a `CopyCountConstraint` or a `CopyChangeConstraint`." - raise ValueError(err_msg) - - return v - - -class CategoricalCnv(CategoricalCnvProperties, CategoricalVariant): - """A representation of the constraints for matching knowledge about CNVs.""" diff --git a/src/ga4gh/cat_vrs/recipes.py b/src/ga4gh/cat_vrs/recipes.py new file mode 100644 index 0000000..b8ada7b --- /dev/null +++ b/src/ga4gh/cat_vrs/recipes.py @@ -0,0 +1,190 @@ +"""Define Pydantic models for GA4GH categorical variation objects. + +See the `CatVar page `_ on +the GA4GH website for more information. +""" + +from ga4gh.cat_vrs.models import ( + CategoricalVariant, + Constraint, + CopyChangeConstraint, + CopyCountConstraint, + DefiningAlleleConstraint, + DefiningLocationConstraint, + Relation, +) +from pydantic import Field, field_validator + + +class ProteinSequenceConsequence(CategoricalVariant): + """A change that occurs in a protein sequence as a result of genomic changes. Due to + the degenerate nature of the genetic code, there are often several genomic changes + that can cause a protein sequence consequence. The protein sequence consequence, + like a :ref:`CanonicalAllele`, is defined by an + `Allele `_ + that is representative of a collection of congruent Protein Alleles that share the + same altered codon(s). + """ + + constraints: list[Constraint] = Field(..., min_length=1) + + @field_validator("constraints") + @classmethod + def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: + """Validate constraints property + + At least one constraint in ``constraints`` must satisfy ALL of the following + requirements: + 1. Must be a ``DefiningAlleleConstraint`` + 2. Must have ``relations`` property that meets ALL of the following + requirements: + a. Must contain exactly one relation where ``primaryCode = translates_from`` + + :param v: Constraints property to validate + :raises ValueError: If constraints property does not satisfy the requirements + :return: Constraints property + """ + if not any( + isinstance(constraint.root, DefiningAlleleConstraint) + and constraint.root.relations + and sum( + 1 + for r in constraint.root.relations + if r.primaryCode + and r.primaryCode.root == Relation.TRANSLATES_FROM.value + ) + == 1 + for constraint in v + ): + err_msg = f"Unable to find at least one constraint that is a `DefiningAlleleConstraint` and has exactly one `relation` where the `primaryCode` is '{Relation.TRANSLATES_FROM.value}'." + raise ValueError(err_msg) + + return v + + +class CanonicalAllele(CategoricalVariant): + """A canonical allele is defined by an + `Allele `_ + that is representative of a collection of congruent Alleles, each of which depict + the same nucleic acid change on different underlying reference sequences. Congruent + representations of an Allele often exist across different genome assemblies and + associated cDNA transcript representations. + """ + + constraints: list[Constraint] = Field(..., min_length=1, max_length=1) + + @field_validator("constraints") + @classmethod + def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: + """Validate constraints property + + Exactly one constraint in ``constraints`` must satisfy ALL of the following + requirements: + 1. Must be a ``DefiningAlleleConstraint`` + 2. Must have ``relations`` property that meets ALL of the following + requirements: + a. Must contain exactly one relation where ``primaryCode = liftover_to`` + b. Must contain exactly one relation where ``primaryCode = transcribes_to`` + + :param v: Constraints property to validate + :raises ValueError: If constraints property does not satisfy the requirements + :return: Constraints property + """ + constraint = v[0] + + if not isinstance(constraint.root, DefiningAlleleConstraint): + err_msg = "Constraint must be a `DefiningAlleleConstraint`." + raise ValueError(err_msg) + + if not constraint.root.relations: + err_msg = "`relations` is required." + raise ValueError(err_msg) + + if ( + sum( + 1 + for r in constraint.root.relations + if r.primaryCode and r.primaryCode.root == Relation.LIFTOVER_TO.value + ) + != 1 + ): + err_msg = f"Must contain exactly one relation where `primaryCode` is '{Relation.LIFTOVER_TO.value}'." + raise ValueError(err_msg) + + if ( + sum( + 1 + for r in constraint.root.relations + if r.primaryCode and r.primaryCode.root == Relation.TRANSCRIBES_TO.value + ) + != 1 + ): + err_msg = f"Must contain exactly one relation where `primaryCode` is '{Relation.TRANSCRIBES_TO.value}'." + raise ValueError(err_msg) + + return v + + +class CategoricalCnv(CategoricalVariant): + """A representation of the constraints for matching knowledge about CNVs.""" + + constraints: list[Constraint] = Field( + ..., + min_length=2, + max_length=2, + description="The constraints array must contain exactly two items: a DefiningLocationConstraint and either a CopyChangeConstraint or CopyCountConstraint.", + ) + + @field_validator("constraints") + @classmethod + def validate_constraints(cls, v: list[Constraint]) -> list[Constraint]: + """Validate constraints property + + ``constraints`` must contain two constraints: + 1. ``DefiningLocationConstraint`` where the ``relations`` property contains + at least one relation where ``primaryCode = liftover_to`` + 2. Either a ``CopyCountConstraint`` or ``CopyChangeCount`` + + :param v: Constraints property to validate + :raises ValueError: If constraints property does not satisfy the requirements + :return: Constraints property + """ + def_loc_constr_found = False + def_loc_constr_valid = False + copy_constr_found = False + + for constraint in v: + constraint = constraint.root + if not def_loc_constr_valid and isinstance( + constraint, DefiningLocationConstraint + ): + def_loc_constr_found = True + + for r in constraint.relations: + if ( + r.primaryCode + and r.primaryCode.root == Relation.LIFTOVER_TO.value + ): + def_loc_constr_valid = True + continue + + if not copy_constr_found: + copy_constr_found = isinstance( + constraint, CopyCountConstraint | CopyChangeConstraint + ) + + if not def_loc_constr_found: + err_msg = f"Must contain a `DefiningLocationConstraint` with at least one relation where `primaryCode` is '{Relation.LIFTOVER_TO.value}'." + raise ValueError(err_msg) + + if not def_loc_constr_valid: + err_msg = f"`DefiningLocationConstraint` found, but must contain at least one relation where `primaryCode` is '{Relation.LIFTOVER_TO.value}'" + raise ValueError(err_msg) + + if not copy_constr_found: + err_msg = ( + "Must contain either a `CopyCountConstraint` or `CopyChangeConstraint`." + ) + raise ValueError(err_msg) + + return v diff --git a/submodules/cat_vrs b/submodules/cat_vrs index 1458c87..b8f4aaf 160000 --- a/submodules/cat_vrs +++ b/submodules/cat_vrs @@ -1 +1 @@ -Subproject commit 1458c878dc8da73f26aced068b99781acb4e6867 +Subproject commit b8f4aaf812e36de749222c8d2d00fcae20137a47 diff --git a/tests/validation/test_cat_vrs_models.py b/tests/validation/test_cat_vrs_models.py new file mode 100644 index 0000000..439918d --- /dev/null +++ b/tests/validation/test_cat_vrs_models.py @@ -0,0 +1,365 @@ +"""Test Cat VRS Pydantic models""" + +from copy import deepcopy + +import pytest +from ga4gh.cat_vrs import models, recipes +from ga4gh.core.models import ( + MappableConcept, + code, +) +from ga4gh.vrs.models import CopyChange + +DUMMY_ALLELE_IRI = "allele.json#/1" # Valid IRI but does not reference anything + + +def def_allele_constr_empty_relations( + is_empty_list=True, +) -> models.DefiningAlleleConstraint: + """Return DefiningAlleleConstraint with either empty relations or null relations + + :param is_empty_list: ``True`` if relations should be empty list. Otherwise, + relations will be null + """ + return models.DefiningAlleleConstraint( + relations=[] if is_empty_list else None, allele="allele.json#/1" + ) + + +@pytest.fixture(scope="module") +def copy_change_constr(): + """Create test fixture for copy change constraint""" + return models.CopyChangeConstraint(copyChange=CopyChange.EFO_0030069.value) + + +@pytest.fixture(scope="module") +def members(): + """Create test fixture for members""" + return {"members": ["variation.json#/1"]} + + +@pytest.fixture(scope="module") +def defining_loc_constr(): + """Create test fixture for defining location constraint""" + return models.DefiningLocationConstraint( + relations=[ + MappableConcept(primaryCode=code(models.Relation.LIFTOVER_TO.value)) + ], + location="location.json#/1", + matchCharacteristic=MappableConcept(label="test"), + ) + + +def test_copy_count_constraint(): + """Test the CopyCountConstraint validator""" + # Valid Copy Count Constraint + assert models.CopyCountConstraint(copies=2) + + # Invalid Copy Count Constraint + with pytest.raises( + ValueError, + match="The first integer must be less than or equal to the second integer.", + ): + models.CopyCountConstraint(copies=[3, 2]) + + +def test_copy_change_constraint(): + """Test the CopyChangeConstraint validator""" + # Valid Copy Change + assert models.CopyChangeConstraint(copyChange=CopyChange.EFO_0030069.value) + + # Invalid Copy Change + with pytest.raises(ValueError, match="copyChange, 0030069, not one of"): + models.CopyChangeConstraint(copyChange="0030069") + + +def test_protein_sequence_consequence(defining_loc_constr, members): + """Test the ProteinSequenceConsequence validator""" + # Valid PSC + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.TRANSLATES_FROM.value) + ) + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + assert recipes.ProteinSequenceConsequence(**valid_params) + + # Invalid PSC: Constraint is NOT DefiningAlleleContext + err_msg = "Unable to find at least one constraint that is a" + invalid_params = deepcopy(members) + invalid_params["constraints"] = [models.Constraint(root=defining_loc_constr)] + with pytest.raises(ValueError, match=err_msg): + assert recipes.ProteinSequenceConsequence(**invalid_params) + + # Invalid PSC: No relations defined + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint(root=def_allele_constr_empty_relations(is_empty_list=False)) + ] + with pytest.raises(ValueError, match=err_msg): + recipes.ProteinSequenceConsequence(**invalid_params) + + # Invalid PSC: Empty list of relations + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint(root=def_allele_constr_empty_relations(is_empty_list=True)) + ] + with pytest.raises(ValueError, match=err_msg): + recipes.ProteinSequenceConsequence(**invalid_params) + + # Invalid PSC: relations does not use primaryCode + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[MappableConcept(label=models.Relation.LIFTOVER_TO.value)], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises(ValueError, match=err_msg): + recipes.ProteinSequenceConsequence(**invalid_params) + + # Invalid PSC: relations has 0 'translates_from' + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept(primaryCode=code(models.Relation.LIFTOVER_TO.value)) + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises(ValueError, match=err_msg): + recipes.ProteinSequenceConsequence(**invalid_params) + + # Invalid PSC: relations has > 1 'translates_from' + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.TRANSLATES_FROM.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSLATES_FROM.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises(ValueError, match=err_msg): + recipes.ProteinSequenceConsequence(**invalid_params) + + +def test_canonical_allele(defining_loc_constr, members): + """Test the CanonicalAllele validator""" + # Valid CanonicalAllele + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.LIFTOVER_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + assert recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: Constraint is NOT DefiningAlleleContext + valid_params = deepcopy(members) + valid_params["constraints"] = [models.Constraint(root=defining_loc_constr)] + with pytest.raises( + ValueError, match="Constraint must be a `DefiningAlleleConstraint`." + ): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: No relations defined + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint(root=def_allele_constr_empty_relations(is_empty_list=False)) + ] + with pytest.raises(ValueError, match="`relations` is required."): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: Empty list of relations + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint(root=def_allele_constr_empty_relations(is_empty_list=True)) + ] + with pytest.raises(ValueError, match="`relations` is required."): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: No 'liftover_to' + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises( + ValueError, + match="Must contain exactly one relation where `primaryCode` is 'liftover_to'.", + ): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: > 1 'liftover_to' + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.LIFTOVER_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.LIFTOVER_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises( + ValueError, + match="Must contain exactly one relation where `primaryCode` is 'liftover_to'.", + ): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: No 'transcribes_to' + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.LIFTOVER_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSLATES_FROM.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises( + ValueError, + match="Must contain exactly one relation where `primaryCode` is 'transcribes_to'.", + ): + recipes.CanonicalAllele(**valid_params) + + # Invalid CanonicalAllele: > 1 'transcribes_to' + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint( + root=models.DefiningAlleleConstraint( + relations=[ + MappableConcept( + primaryCode=code(models.Relation.LIFTOVER_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + MappableConcept( + primaryCode=code(models.Relation.TRANSCRIBES_TO.value) + ), + ], + allele=DUMMY_ALLELE_IRI, + ) + ) + ] + with pytest.raises( + ValueError, + match="Must contain exactly one relation where `primaryCode` is 'transcribes_to'.", + ): + recipes.CanonicalAllele(**valid_params) + + +def test_categorical_cnv(members, defining_loc_constr, copy_change_constr): + """Test the CategoricalCnv validator""" + # Valid CategoricalCnv with CopyChangeConstraint + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint(root=defining_loc_constr), + models.Constraint(root=copy_change_constr), + ] + assert recipes.CategoricalCnv(**valid_params) + + # Valid CategoricalCnv with CopyCountConstraint + valid_params = deepcopy(members) + valid_params["constraints"] = [ + models.Constraint(root=defining_loc_constr), + models.Constraint(root=models.CopyCountConstraint(copies=[1, 2])), + ] + assert recipes.CategoricalCnv(**valid_params) + + # Invalid CategoricalCnv: No DefiningLocationConstraint + invalid_params = deepcopy(members) + invalid_params["constraints"] = [ + models.Constraint(root=def_allele_constr_empty_relations()), + copy_change_constr, + ] + with pytest.raises( + ValueError, + match="Must contain a `DefiningLocationConstraint` with at least one relation where `primaryCode` is 'liftover_to'.", + ): + recipes.CategoricalCnv(**invalid_params) + + # Invalid CategoricalCnv: DefiningLocationConstraint does not have 'liftover_to' + invalid_params = deepcopy(members) + invalid_defining_loc_constr = defining_loc_constr.copy(deep=True) + invalid_defining_loc_constr.relations = [ + MappableConcept(primaryCode=code(models.Relation.TRANSCRIBES_TO.value)) + ] + invalid_params["constraints"] = [ + invalid_defining_loc_constr, + copy_change_constr, + ] + with pytest.raises( + ValueError, + match="`DefiningLocationConstraint` found, but must contain at least one relation where `primaryCode` is 'liftover_to'.", + ): + recipes.CategoricalCnv(**invalid_params) + + # Invalid No CopyCountConstraint and no CopyChangeConstraint provided + invalid_params["constraints"] = [ + invalid_defining_loc_constr, + defining_loc_constr, + ] + with pytest.raises( + ValueError, + match="Must contain either a `CopyCountConstraint` or `CopyChangeConstraint`.", + ): + recipes.CategoricalCnv(**invalid_params) diff --git a/tests/validation/test_cat_vrs_schema.py b/tests/validation/test_cat_vrs_schema.py index 830b528..1ca82cd 100644 --- a/tests/validation/test_cat_vrs_schema.py +++ b/tests/validation/test_cat_vrs_schema.py @@ -1,19 +1,19 @@ -"""Test that Cat VRS-Python Pydantic models match corresponding schemas""" +"""Test that Cat VRS-Python Pydantic models match corresponding JSON schemas""" import json from enum import Enum from pathlib import Path import pytest -from ga4gh.cat_vrs import core_models, profile_models +from ga4gh.cat_vrs import models, recipes from pydantic import BaseModel class CatVrsSchema(str, Enum): """Enum for Cat VRS schema""" - CORE = "core" - PROFILES = "profiles" + CAT_VRS = "cat_vrs" + RECIPES = "recipes" class CatVrsSchemaMapping(BaseModel): @@ -54,10 +54,10 @@ def _update_cat_vrs_schema_mapping( # Get core + profiles classes for child in SUBMODULES_DIR.iterdir(): child_str = str(child) - if child_str.endswith(CatVrsSchema.CORE): - mapping_key = CatVrsSchema.CORE - elif child_str.endswith(CatVrsSchema.PROFILES): - mapping_key = CatVrsSchema.PROFILES + if child_str.endswith(CatVrsSchema.CAT_VRS): + mapping_key = CatVrsSchema.CAT_VRS + elif child_str.endswith(CatVrsSchema.RECIPES): + mapping_key = CatVrsSchema.RECIPES else: continue @@ -69,8 +69,8 @@ def _update_cat_vrs_schema_mapping( @pytest.mark.parametrize( ("cat_vrs_schema", "pydantic_models"), [ - (CatVrsSchema.CORE, core_models), - (CatVrsSchema.PROFILES, profile_models), + (CatVrsSchema.CAT_VRS, models), + (CatVrsSchema.RECIPES, recipes), ], ) def test_schema_models_in_pydantic(cat_vrs_schema, pydantic_models): @@ -85,8 +85,8 @@ def test_schema_models_in_pydantic(cat_vrs_schema, pydantic_models): @pytest.mark.parametrize( ("cat_vrs_schema", "pydantic_models"), [ - (CatVrsSchema.CORE, core_models), - (CatVrsSchema.PROFILES, profile_models), + (CatVrsSchema.CAT_VRS, models), + (CatVrsSchema.RECIPES, recipes), ], ) def test_schema_class_fields(cat_vrs_schema, pydantic_models):