diff --git a/pyproject.toml b/pyproject.toml index 97bc739..1c12371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ "ga4gh.vrs==2.*", - "ga4gh.cat_vrs~=0.5.0", + "ga4gh.cat_vrs~=0.6.0", "pydantic==2.*" ] diff --git a/src/ga4gh/va_spec/aac_2017/models.py b/src/ga4gh/va_spec/aac_2017/models.py index 320edcd..7de1af6 100644 --- a/src/ga4gh/va_spec/aac_2017/models.py +++ b/src/ga4gh/va_spec/aac_2017/models.py @@ -9,7 +9,7 @@ from ga4gh.core.models import MappableConcept, iriReference from ga4gh.va_spec.base.core import ( Method, - StatementValidatorMixin, + Statement, VariantDiagnosticProposition, VariantPrognosticProposition, VariantTherapeuticResponseProposition, @@ -17,7 +17,6 @@ from ga4gh.va_spec.base.enums import System from ga4gh.va_spec.base.validators import validate_mappable_concept from pydantic import ( - BaseModel, Field, field_validator, ) @@ -47,10 +46,10 @@ class Classification(str, Enum): AMP_ASCO_CAP_TIERS = [v.value for v in Classification.__members__.values()] -class AmpAscoCapValidatorMixin(StatementValidatorMixin): +class AmpAscoCapValidatorMixin: """Mixin class for reusable AMP/ASCO/CAP field validators - Should be used with classes that inherit from Pydantic BaseModel + Should be used with classes that inherit from Statement """ @field_validator("strength") @@ -63,7 +62,10 @@ def validate_strength(cls, v: MappableConcept | None) -> MappableConcept | None: :return: Validated strength value """ return validate_mappable_concept( - v, System.AMP_ASCO_CAP, AMP_ASCO_CAP_LEVELS, mc_is_required=False + v, + System.AMP_ASCO_CAP, + valid_codes=AMP_ASCO_CAP_LEVELS, + mc_is_required=False, ) @field_validator("classification") @@ -75,10 +77,12 @@ def validate_classification(cls, v: MappableConcept) -> MappableConcept: :raises ValueError: If invalid classification values are provided :return: Validated classification value """ - return validate_mappable_concept(v, System.AMP_ASCO_CAP, AMP_ASCO_CAP_TIERS) + return validate_mappable_concept( + v, System.AMP_ASCO_CAP, valid_codes=AMP_ASCO_CAP_TIERS + ) -class VariantDiagnosticStudyStatement(BaseModel, AmpAscoCapValidatorMixin): +class VariantDiagnosticStudyStatement(Statement, AmpAscoCapValidatorMixin): """A statement reporting a conclusion from a single study about whether a variant is associated with a disease (a diagnostic inclusion criterion), or absence of a disease (diagnostic exclusion criterion) - based on interpretation of the study's @@ -103,7 +107,7 @@ class VariantDiagnosticStudyStatement(BaseModel, AmpAscoCapValidatorMixin): ) -class VariantPrognosticStudyStatement(BaseModel, AmpAscoCapValidatorMixin): +class VariantPrognosticStudyStatement(Statement, AmpAscoCapValidatorMixin): """A statement reporting a conclusion from a single study about whether a variant is associated with a disease prognosis - based on interpretation of the study's results. @@ -127,7 +131,7 @@ class VariantPrognosticStudyStatement(BaseModel, AmpAscoCapValidatorMixin): ) -class VariantTherapeuticResponseStudyStatement(BaseModel, AmpAscoCapValidatorMixin): +class VariantTherapeuticResponseStudyStatement(Statement, AmpAscoCapValidatorMixin): """A statement reporting a conclusion from a single study about whether a variant is associated with a therapeutic response (positive or negative) - based on interpretation of the study's results. diff --git a/src/ga4gh/va_spec/acmg_2015/__init__.py b/src/ga4gh/va_spec/acmg_2015/__init__.py index 4853d76..0228946 100644 --- a/src/ga4gh/va_spec/acmg_2015/__init__.py +++ b/src/ga4gh/va_spec/acmg_2015/__init__.py @@ -2,18 +2,14 @@ from .models import ( ACMG_CLASSIFICATIONS, - EVIDENCE_OUTCOME_VALUES, AcmgClassification, - EvidenceOutcome, - VariantPathogenicityFunctionalImpactEvidenceLine, + VariantPathogenicityEvidenceLine, VariantPathogenicityStatement, ) __all__ = [ "ACMG_CLASSIFICATIONS", - "EVIDENCE_OUTCOME_VALUES", "AcmgClassification", - "EvidenceOutcome", - "VariantPathogenicityFunctionalImpactEvidenceLine", + "VariantPathogenicityEvidenceLine", "VariantPathogenicityStatement", ] diff --git a/src/ga4gh/va_spec/acmg_2015/models.py b/src/ga4gh/va_spec/acmg_2015/models.py index e923bb1..e55d3f8 100644 --- a/src/ga4gh/va_spec/acmg_2015/models.py +++ b/src/ga4gh/va_spec/acmg_2015/models.py @@ -7,9 +7,9 @@ from ga4gh.core.models import MappableConcept, iriReference from ga4gh.va_spec.base.core import ( - EvidenceLineValidatorMixin, + EvidenceLine, Method, - StatementValidatorMixin, + Statement, VariantPathogenicityProposition, ) from ga4gh.va_spec.base.enums import ( @@ -21,23 +21,7 @@ from ga4gh.va_spec.base.validators import ( validate_mappable_concept, ) -from pydantic import BaseModel, Field, field_validator, model_validator - - -class EvidenceOutcome(str, Enum): - """Define constraints for evidence outcome values""" - - PS3 = "PS3" - PS3_MODERATE = "PS3_moderate" - PS3_SUPPORTING = "PS3_supporting" - PS3_NOT_MET = "PS3_not_met" - BS3 = "BS3" - BS3_MODERATE = "BS3_moderate" - BS3_SUPPORTING = "BS3_supporting" - BS3_NOT_MET = "BS3_not_met" - - -EVIDENCE_OUTCOME_VALUES = [v.value for v in EvidenceOutcome.__members__.values()] +from pydantic import Field, field_validator, model_validator class AcmgClassification(str, Enum): @@ -53,17 +37,15 @@ class AcmgClassification(str, Enum): ACMG_CLASSIFICATIONS = [v.value for v in AcmgClassification.__members__.values()] -class VariantPathogenicityFunctionalImpactEvidenceLine( - BaseModel, EvidenceLineValidatorMixin -): - """An Evidence Line that describes how information about the functional impact of a - variant on a gene or gene product was interpreted as evidence for or against the - variant's pathogenicity. +class VariantPathogenicityEvidenceLine(EvidenceLine): + """An Evidence Line that describes how information about the specific criterion + evidence for the variant was assessed as evidence for or against the variant's + pathogenicity. """ targetProposition: VariantPathogenicityProposition | None = Field( None, - description="A Variant Pathogenicity Proposition against which functional impact information was assessed, in determining the strength and direction of support this information provides as evidence.", + description="A Variant Pathogenicity Proposition against which specific information was assessed, in determining the strength and direction of support this information provides as evidence.", ) strengthOfEvidenceProvided: MappableConcept | None = Field( None, @@ -71,9 +53,41 @@ class VariantPathogenicityFunctionalImpactEvidenceLine( ) specifiedBy: Method | iriReference = Field( ..., - description="The guidelines that were followed to interpret variant functional impact information as evidence for or against the assessed variant's pathogenicity.", + description="The guidelines that were followed to assess variant information as evidence for or against the assessed variant's pathogenicity.", ) + class Criterion(str, Enum): + """Define ACMG 2015 criterion values""" + + PVS1 = "PVS1" + PS1 = "PS1" + PS2 = "PS2" + PS3 = "PS3" + PS4 = "PS4" + PM1 = "PM1" + PM2 = "PM2" + PM3 = "PM3" + PM4 = "PM4" + PM5 = "PM5" + PM6 = "PM6" + PP1 = "PP1" + PP2 = "PP2" + PP3 = "PP3" + PP4 = "PP4" + PP5 = "PP5" + BA1 = "BA1" + BS1 = "BS1" + BS2 = "BS2" + BS3 = "BS3" + BS4 = "BS4" + BP1 = "BP1" + BP2 = "BP2" + BP3 = "BP3" + BP4 = "BP4" + BP5 = "BP5" + BP6 = "BP6" + BP7 = "BP7" + @field_validator("strengthOfEvidenceProvided") @classmethod def validate_strength_of_evidence_provided( @@ -86,43 +100,35 @@ def validate_strength_of_evidence_provided( :return: Validated strengthOfEvidenceProvided value """ return validate_mappable_concept( - v, System.ACMG, STRENGTH_OF_EVIDENCE_PROVIDED_VALUES, mc_is_required=False + v, + System.ACMG, + valid_codes=STRENGTH_OF_EVIDENCE_PROVIDED_VALUES, + mc_is_required=False, ) - @field_validator("specifiedBy") - @classmethod - def validate_specified_by(cls, v: Method | iriReference) -> Method | iriReference: - """Validate specifiedBy - - :param v: specifiedBy - :raises ValueError: If invalid specifiedBy values are provided - :return: Validated specifiedBy value - """ - if isinstance(v, Method) and not v.reportedIn: - err_msg = "`reportedIn` is required." - raise ValueError(err_msg) - - return v - @model_validator(mode="before") - def validate_evidence_outcome(cls, values: dict) -> dict: # noqa: N805 - """Validate ``evidenceOutcome`` property if it exists + def validate_model(cls, values: dict) -> dict: # noqa: N805 + """Validate ``evidenceOutcome`` and ``directionOfEvidenceProvided`` properties :param values: Input values :raises ValueError: If ``evidenceOutcome`` exists and is invalid :return: Validated input values. If ``evidenceOutcome`` exists, then it will be - validated and converted to a ``MappableConcept`` + validated and converted to a ``MappableConcept``. + Or if ``strengthOfEvidenceProvided`` is not provided when + ``directionOfEvidenceProvided`` is supports or disputes or if + ``strengthOfEvidenceProvided`` is provided when + ``directionOfEvidenceProvided`` is neutral """ - return cls._validate_evidence_outcome( - values, System.ACMG, EVIDENCE_OUTCOME_VALUES - ) + cls._validate_direction_of_evidence_provided(values) + acmg_code_pattern = r"^((?:PVS1)(?:_(?:not_met|(?:strong|moderate|supporting)))?|(?:PS[1-4]|BS[1-4])(?:_(?:not_met|(?:very_strong|moderate|supporting)))?|BA1(?:_not_met)?|(?:PM[1-6])(?:_(?:not_met|(?:very_strong|strong|supporting)))?|(PP[1-5]|BP[1-7])(?:_(?:not_met|very_strong|strong|moderate))?)$" + return cls._validate_evidence_outcome(values, System.ACMG, acmg_code_pattern) -class VariantPathogenicityStatement(BaseModel, StatementValidatorMixin): +class VariantPathogenicityStatement(Statement): """A Statement describing the role of a variant in causing an inherited condition.""" - proposition: VariantPathogenicityProposition | None = Field( - None, + proposition: VariantPathogenicityProposition = Field( + ..., description="A proposition about the pathogenicity of a varaint, the validity of which is assessed and reported by the Statement. A Statement can put forth the proposition as being true, false, or uncertain, and may provide an assessment of the level of confidence/evidence supporting this claim.", ) strength: MappableConcept | None = Field( @@ -148,7 +154,7 @@ def validate_strength(cls, v: MappableConcept | None) -> MappableConcept | None: :return: Validated strength value """ return validate_mappable_concept( - v, System.ACMG, STRENGTHS, mc_is_required=False + v, System.ACMG, valid_codes=STRENGTHS, mc_is_required=False ) @field_validator("classification") diff --git a/src/ga4gh/va_spec/base/__init__.py b/src/ga4gh/va_spec/base/__init__.py index cf9183f..79d710f 100644 --- a/src/ga4gh/va_spec/base/__init__.py +++ b/src/ga4gh/va_spec/base/__init__.py @@ -19,6 +19,7 @@ StudyGroup, StudyResult, SubjectVariantProposition, + TumorVariantFrequencyStudyResult, VariantDiagnosticProposition, VariantOncogenicityProposition, VariantPathogenicityProposition, @@ -85,4 +86,5 @@ "VariantPathogenicityProposition", "VariantPrognosticProposition", "VariantTherapeuticResponseProposition", + "TumorVariantFrequencyStudyResult", ] diff --git a/src/ga4gh/va_spec/base/core.py b/src/ga4gh/va_spec/base/core.py index 27c59e6..9f9caa5 100644 --- a/src/ga4gh/va_spec/base/core.py +++ b/src/ga4gh/va_spec/base/core.py @@ -26,22 +26,85 @@ from ga4gh.va_spec.base.validators import validate_mappable_concept from ga4gh.vrs.models import Allele, MolecularVariation from pydantic import ( - BaseModel, ConfigDict, Field, RootModel, StringConstraints, ValidationError, field_validator, - model_validator, ) StatementType = TypeVar("StatementType") EvidenceLineType = TypeVar("EvidenceLineType") -######################################### -# Abstract Core Classes -######################################### + +class CoreType(str, Enum): + """Define VA Spec Base Core Types""" + + METHOD = "Method" + CONTRIBUTION = "Contribution" + DOCUMENT = "Document" + AGENT = "Agent" + STATEMENT = "Statement" + EVIDENCE_LINE = "EvidenceLine" + DATA_SET = "DataSet" + STUDY_GROUP = "StudyGroup" + + +class Contribution(Entity, BaseModelForbidExtra): + """An action taken by an agent in contributing to the creation, modification, + assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, + DataSet, Publication, etc.) + """ + + type: Literal["Contribution"] = Field( + CoreType.CONTRIBUTION.value, + description=f"MUST be '{CoreType.CONTRIBUTION.value}'.", + ) + contributor: Agent | None = Field( + None, description="The agent that made the contribution." + ) + activityType: str | None = Field( + None, + description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc.). Values of this property may be framed as activities, or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).", + ) + date: datetime | None = Field( + None, description="When the contributing activity was completed." + ) + + +class Document(Entity, BaseModelForbidExtra): + """A collection of information, usually in a text-based or graphic human-readable + form, intended to be read and understood together as a whole. + """ + + type: Literal["Document"] = Field( + CoreType.DOCUMENT.value, description=f"Must be '{CoreType.DOCUMENT.value}'" + ) + documentType: str | None = Field( + None, + description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')", + ) + title: str | None = Field( + None, description="The official title given to the document by its authors." + ) + urls: ( + list[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]] | None + ) = Field( + None, + description="One or more URLs from which the content of the Document can be retrieved.", + ) + doi: ( + Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")] + | None + ) = Field( + None, + description="A [Digital Object Identifier](https://www.doi.org/the-identifier/what-is-a-doi/) for the document.", + ) + pmid: int | None = Field( + None, + description="A [PubMed unique identifier](https://en.wikipedia.org/wiki/PubMed#PubMed_identifier) for the document.", + ) class InformationEntity(Entity): @@ -116,6 +179,45 @@ class CohortAlleleFrequencyStudyResult(_StudyResult, BaseModelForbidExtra): ) +class TumorVariantFrequencyStudyResult(_StudyResult, BaseModelForbidExtra): + """A Study Result that reports measures related to the frequency of an variant + across different tumor types. + """ + + type: Literal["TumorVariantFrequencyStudyResult"] = Field( + "TumorVariantFrequencyStudyResult", + description="MUST be 'TumorVariantFrequencyStudyResult'.", + ) + sourceDataSet: DataSet | None = Field( + None, + description="The dataset from which data in the Tumor Variant Frequency Study Result was taken.", + ) + focusVariant: Allele | CategoricalVariant | iriReference = Field( + ..., + description="The variant for which frequency data is reported in the Study Result.", + ) + affectedSampleCount: int = Field( + ..., + description="The number of tumor samples in the sample group that contain the focus variant.", + ) + totalSampleCount: int = Field( + ..., + description="The total number of tumor samples in the sample group.", + ) + affectedFrequency: float = Field( + ..., + description="The frequency of tumor samples that include the focus variant in the sample group.", + ) + sampleGroup: StudyGroup | None = Field( + None, + description="The set of samples about which the frequency data was generated.", + ) + subGroupFrequency: list[TumorVariantFrequencyStudyResult] | None = Field( + None, + description="A list of Tumor Variant Frequency Study Result objects describing variant frequency in different subsets of larger sample group described in the root Study Result. Subgroups can be further subdivided into more subgroups. This enables, for example, further breakdown of frequency measures in sample groups with a narrower categorical variant than the root focus variant, or sample groups with a more specific tumor type.", + ) + + class ExperimentalVariantFunctionalImpactStudyResult( _StudyResult, BaseModelForbidExtra ): @@ -328,31 +430,13 @@ class VariantTherapeuticResponseProposition( ) -######################################### -# Concrete Core Classes -######################################### - - -class CoreType(str, Enum): - """Define VA Spec Base Core Types""" - - METHOD = "Method" - CONTRIBUTION = "Contribution" - DOCUMENT = "Document" - AGENT = "Agent" - STATEMENT = "Statement" - EVIDENCE_LINE = "EvidenceLine" - DATA_SET = "DataSet" - STUDY_GROUP = "StudyGroup" - - class Method(Entity, BaseModelForbidExtra): """A set of instructions that specify how to achieve some objective.""" type: Literal["Method"] = Field( CoreType.METHOD.value, description=f"MUST be '{CoreType.METHOD.value}'." ) - subtype: MappableConcept | None = Field( + methodType: str | None = Field( None, description="A specific type of method that a Method instance represents (e.g. 'Variant Interpretation Guideline', or 'Experimental Protocol').", ) @@ -361,62 +445,6 @@ class Method(Entity, BaseModelForbidExtra): ) -class Contribution(Entity, BaseModelForbidExtra): - """An action taken by an agent in contributing to the creation, modification, - assessment, or deprecation of a particular entity (e.g. a Statement, EvidenceLine, - DataSet, Publication, etc.) - """ - - type: Literal["Contribution"] = Field( - CoreType.CONTRIBUTION.value, - description=f"MUST be '{CoreType.CONTRIBUTION.value}'.", - ) - contributor: Agent | None = Field( - None, description="The agent that made the contribution." - ) - activityType: MappableConcept | None = Field( - None, - description="The specific type of activity performed or role played by an agent in making the contribution (e.g. for a publication, agents may contribute as a primary author, editor, figure designer, data generator, etc.). Values of this property may be framed as activities, or as contribution roles (e.g. using terms from the Contribution Role Ontology (CRO)).", - ) - date: datetime | None = Field( - None, description="When the contributing activity was completed." - ) - - -class Document(Entity, BaseModelForbidExtra): - """A collection of information, usually in a text-based or graphic human-readable - form, intended to be read and understood together as a whole. - """ - - type: Literal["Document"] = Field( - CoreType.DOCUMENT.value, description=f"Must be '{CoreType.DOCUMENT.value}'" - ) - subtype: MappableConcept | None = Field( - None, - description="A specific type of document that a Document instance represents (e.g. 'publication', 'patent', 'pathology report')", - ) - title: str | None = Field( - None, description="The official title given to the document by its authors." - ) - urls: ( - list[Annotated[str, StringConstraints(pattern=r"^(https?|s?ftp)://")]] | None - ) = Field( - None, - description="One or more URLs from which the content of the Document can be retrieved.", - ) - doi: ( - Annotated[str, StringConstraints(pattern=r"^10\.(\d+)(\.\d+)*\/[\w\-\.]+")] - | None - ) = Field( - None, - description="A [Digital Object Identifier](https://www.doi.org/the-identifier/what-is-a-doi/) for the document.", - ) - pmid: int | None = Field( - None, - description="A [PubMed unique identifier](https://en.wikipedia.org/wiki/PubMed#PubMed_identifier) for the document.", - ) - - class Agent(Entity, BaseModelForbidExtra): """An autonomous actor (person, organization, or software agent) that bears some form of responsibility for an activity taking place, for the existence of an entity, @@ -427,7 +455,7 @@ class Agent(Entity, BaseModelForbidExtra): CoreType.AGENT.value, description=f"MUST be '{CoreType.AGENT.value}'." ) name: str | None = Field(None, description="The given name of the Agent.") - subtype: MappableConcept | None = Field( + agentType: str | None = Field( None, description="A specific type of agent the Agent object represents. Recommended subtypes include codes for `person`, `organization`, or `software`.", ) @@ -451,7 +479,7 @@ class DataSet(Entity, BaseModelForbidExtra): type: Literal["DataSet"] = Field( CoreType.DATA_SET.value, description=f"MUST be '{CoreType.DATA_SET.value}'." ) - subtype: MappableConcept | None = Field( + datasetType: str | None = Field( None, description="A specific type of data set the DataSet instance represents (e.g. a 'clinical data set', a 'sequencing data set', a 'gene expression data set', a 'genome annotation data set')", ) @@ -544,12 +572,16 @@ def validate_has_evidence_items( obj_ for _, obj_ in vars(imported_module).items() if inspect.isclass(obj_) - and issubclass(obj_, BaseModel) + and issubclass(obj_, Statement) and obj_.__name__.endswith(("Statement", "EvidenceLine")) + and obj_ not in (Statement, EvidenceLine) ] ) - has_evidence_items_models.extend([Statement, StudyResult, EvidenceLine]) + has_evidence_items_models.extend( + [Statement, StudyResult, EvidenceLine, iriReference] + ) + for evidence_item in v: if isinstance(evidence_item, dict): found_model = False @@ -567,11 +599,80 @@ def validate_has_evidence_items( raise ValueError(err_msg) elif isinstance(evidence_item, str): evidence_items.append(iriReference(root=evidence_item)) + elif isinstance(evidence_item, tuple(has_evidence_items_models)): + evidence_items.append(evidence_item) else: err_msg = "Unable to find valid model for `hasEvidenceItems`" raise ValueError(err_msg) return evidence_items + @staticmethod + def _validate_evidence_outcome( + values: dict, system: System, code_pattern: str + ) -> dict: + """Validate ``evidenceOutcome`` property if it exists + + :param values: Input values + :param system: System that should be used for ``primaryCoding.system`` + :param code_pattern: The regex pattern that should be used for + ``primaryCoding.code`` + :raises ValueError: If ``evidenceOutcome`` exists and is invalid + :return: Validated input values. If ``evidenceOutcome`` exists, then it will be + validated and converted to a ``MappableConcept`` + """ + if "evidenceOutcome" in values: + mc = MappableConcept(**values["evidenceOutcome"]) + values["evidenceOutcome"] = mc + validate_mappable_concept( + mc, system, code_pattern=code_pattern, mc_is_required=False + ) + return values + + @staticmethod + def _validate_direction_of_evidence_provided(values: dict) -> dict: + """Validate conditional requirements for ``directionOfEvidenceProvided`` + + :param values: Input values + :raises ValueError: If ``strengthOfEvidenceProvided`` is not provided when + ``directionOfEvidenceProvided`` is supports or disputes or if + ``strengthOfEvidenceProvided`` is provided when + ``directionOfEvidenceProvided`` is neutral + :return: Validated input values + """ + direction_of_evidence_provided = values.get("directionOfEvidenceProvided") + if ( + direction_of_evidence_provided in (Direction.SUPPORTS, Direction.DISPUTES) + and values.get("strengthOfEvidenceProvided") is None + ): + err_msg = f"`strengthOfEvidenceProvided` is required when `directionOfEvidenceProvided` is '{Direction.SUPPORTS.value}' or '{Direction.DISPUTES.value}'." + raise ValueError(err_msg) + + if direction_of_evidence_provided == Direction.NEUTRAL and values.get( + "strengthOfEvidenceProvided" + ): + err_msg = f"`strengthOfEvidenceProvided` is not allowed when `directionOfEvidenceProvided` is '{Direction.NEUTRAL.value}'." + raise ValueError(err_msg) + + return values + + @field_validator("specifiedBy") + @classmethod + def validate_specified_by(cls, v: Method | iriReference) -> Method | iriReference: + """Validate specifiedBy + + :param v: specifiedBy + :raises ValueError: If invalid specifiedBy values are provided + :return: Validated specifiedBy value + """ + if hasattr(cls, "Criterion") and isinstance(v, Method): + if not v.reportedIn: + err_msg = "`reportedIn` is required." + raise ValueError(err_msg) + + cls.Criterion(v.methodType) + + return v + class Statement(InformationEntity, BaseModelForbidExtra): """A claim of purported truth as made by a particular agent, on a particular @@ -637,70 +738,3 @@ class StudyGroup(Entity, BaseModelForbidExtra): None, description="A feature or role shared by all members of the StudyGroup, representing a criterion for membership in the group.", ) - - -class StatementValidatorMixin: - """Mixin class for reusable Statement model validators - - Should be used with classes that inherit from Pydantic BaseModel - """ - - model_config = ConfigDict(extra="allow") - - @model_validator(mode="after") - def statement_validator(cls, model: BaseModel) -> BaseModel: # noqa: N805 - """Validate that the model is a ``Statement``. - - :param model: Pydantic BaseModel to validate - :raises ValueError: If ``model`` does not validate against a ``Statement`` - :return: Validated model - """ - try: - Statement(**model.model_dump()) - except ValidationError as e: - err_msg = f"Must be a `Statement`: {e}" - raise ValueError(err_msg) from e - return model - - -class EvidenceLineValidatorMixin: - """Mixin class for reusable EvidenceLine model validators - - Should be used with classes that inherit from Pydantic BaseModel - """ - - model_config = ConfigDict(extra="allow") - - @staticmethod - def _validate_evidence_outcome( - values: dict, system: System, codes: list[str] - ) -> dict: - """Validate ``evidenceOutcome`` property if it exists - - :param values: Input values - :param system: System that should be used in ``MappableConcept`` - :param codes: Codes that should be used in ``MappableConcept`` - :raises ValueError: If ``evidenceOutcome`` exists and is invalid - :return: Validated input values. If ``evidenceOutcome`` exists, then it will be - validated and converted to a ``MappableConcept`` - """ - if "evidenceOutcome" in values: - mc = MappableConcept(**values["evidenceOutcome"]) - values["evidenceOutcome"] = mc - validate_mappable_concept(mc, system, codes, mc_is_required=False) - return values - - @model_validator(mode="after") - def evidence_line_validator(cls, model: BaseModel) -> BaseModel: # noqa: N805 - """Validate that the model is a ``EvidenceLine``. - - :param model: Pydantic BaseModel to validate - :raises ValueError: If ``model`` does not validate against a ``EvidenceLine`` - :return: Validated model - """ - try: - EvidenceLine(**model.model_dump()) - except ValidationError as e: - err_msg = f"Must be an `EvidenceLine`: {e}" - raise ValueError(err_msg) from e - return model diff --git a/src/ga4gh/va_spec/base/validators.py b/src/ga4gh/va_spec/base/validators.py index 8e10efe..25bc6c3 100644 --- a/src/ga4gh/va_spec/base/validators.py +++ b/src/ga4gh/va_spec/base/validators.py @@ -1,5 +1,7 @@ """Shared validator functions""" +import re + from ga4gh.core.models import MappableConcept from ga4gh.va_spec.base.enums import System @@ -7,14 +9,17 @@ def validate_mappable_concept( mc: MappableConcept | None, valid_system: System, - valid_codes: list[str], + valid_codes: list[str] | None = None, + code_pattern: str | None = None, mc_is_required: bool = False, ) -> MappableConcept | None: """Validate GKS Core Mappable Concept object :param mc: Mappable Concept object :param valid_system: The system that should be used - :param valid_codes: The codes that should be used + :param valid_codes: The codes that should be used for ``primaryCoding.code`` + :param code_pattern: The regex pattern that should be used for + ``primaryCoding.code`` :param mc_is_required: Whether or not `mc` is required :raises ValueError: If `mc` is invalid :return: Validated mappable concept @@ -30,8 +35,14 @@ def validate_mappable_concept( err_msg = f"`primaryCoding.system` must be '{valid_system.value}'." raise ValueError(err_msg) - if mc.primaryCoding.code.root not in valid_codes: + if valid_codes is not None and mc.primaryCoding.code.root not in valid_codes: err_msg = f"`primaryCoding.code` must be one of {valid_codes}." raise ValueError(err_msg) + if code_pattern is not None and not re.match( + code_pattern, mc.primaryCoding.code.root + ): + err_msg = f"`primaryCoding.code` does not match regex pattern {code_pattern}." + raise ValueError(err_msg) + return mc diff --git a/src/ga4gh/va_spec/ccv_2022/__init__.py b/src/ga4gh/va_spec/ccv_2022/__init__.py index 27bd06d..8c6c7a9 100644 --- a/src/ga4gh/va_spec/ccv_2022/__init__.py +++ b/src/ga4gh/va_spec/ccv_2022/__init__.py @@ -1,15 +1,11 @@ """Module to load and init namespace at package level.""" from .models import ( - EVIDENCE_OUTCOME_VALUES, - EvidenceOutcome, - VariantOncogenicityFunctionalImpactEvidenceLine, + VariantOncogenicityEvidenceLine, VariantOncogenicityStudyStatement, ) __all__ = [ - "EVIDENCE_OUTCOME_VALUES", - "EvidenceOutcome", - "VariantOncogenicityFunctionalImpactEvidenceLine", + "VariantOncogenicityEvidenceLine", "VariantOncogenicityStudyStatement", ] diff --git a/src/ga4gh/va_spec/ccv_2022/models.py b/src/ga4gh/va_spec/ccv_2022/models.py index 21ee9fd..4732fe5 100644 --- a/src/ga4gh/va_spec/ccv_2022/models.py +++ b/src/ga4gh/va_spec/ccv_2022/models.py @@ -7,9 +7,9 @@ from ga4gh.core.models import MappableConcept, iriReference from ga4gh.va_spec.base.core import ( - EvidenceLineValidatorMixin, + EvidenceLine, Method, - StatementValidatorMixin, + Statement, VariantOncogenicityProposition, ) from ga4gh.va_spec.base.enums import ( @@ -19,36 +19,17 @@ System, ) from ga4gh.va_spec.base.validators import validate_mappable_concept -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import Field, field_validator, model_validator -class EvidenceOutcome(str, Enum): - """Define constraints for evidence outcome values""" - - OS2 = "OS2" - OS2_MODERATE = "OS2_moderate" - OS2_SUPPORTING = "OS2_supporting" - OS2_NOT_MET = "OS2_not_met" - SBS2 = "SBS2" - SBS2_MODERATE = "SBS2_moderate" - SBS2_SUPPORTING = "SBS2_supporting" - SBS2_NOT_MET = "SBS2_not_met" - - -EVIDENCE_OUTCOME_VALUES = [v.value for v in EvidenceOutcome.__members__.values()] - - -class VariantOncogenicityFunctionalImpactEvidenceLine( - BaseModel, EvidenceLineValidatorMixin -): - """An Evidence Line that describes how information about the functional impact of a - variant on a gene or gene product was interpreted as evidence for or against the - variant's oncogenicity. +class VariantOncogenicityEvidenceLine(EvidenceLine): + """An Evidence Line that describes how information about the specific evidence of a + variant was interpreted as evidence for or against the variant's oncogenicity. """ targetProposition: VariantOncogenicityProposition | None = Field( None, - description="A Variant Oncogenicity Proposition against which functional impact information was assessed, in determining the strength and direction of support this information provides as evidence.", + description="A Variant Oncogenicity Proposition against which evidence information was assessed, in determining the strength and direction of support this information provides as evidence.", ) strengthOfEvidenceProvided: MappableConcept | None = Field( None, @@ -56,9 +37,30 @@ class VariantOncogenicityFunctionalImpactEvidenceLine( ) specifiedBy: Method | iriReference = Field( ..., - description="The Clingen/CGC/VICC 2022 criterion that was applied to interpret variant functional impact information as evidence for or against the assessed variant's oncogenicity.", + description="The guidelines that were followed to assess the variant information as evidence for or against the assessed variant's oncogenicity.", ) + class Criterion(str, Enum): + """Define CCV 2022 criterion values""" + + OVS1 = "OVS1" + OS1 = "OS1" + OS2 = "OS2" + OS3 = "OS3" + OM1 = "OM1" + OM2 = "OM2" + OM3 = "OM3" + OM4 = "OM4" + OP1 = "OP1" + OP2 = "OP2" + OP3 = "OP3" + OP4 = "OP4" + SBVS1 = "SBVS1" + SBS1 = "SBS1" + SBS2 = "SBS2" + SBP1 = "SBP1" + SBP2 = "SBP2" + @field_validator("strengthOfEvidenceProvided") @classmethod def validate_strength_of_evidence_provided( @@ -71,31 +73,38 @@ def validate_strength_of_evidence_provided( :return: Validated strengthOfEvidenceProvided value """ return validate_mappable_concept( - v, System.CCV, STRENGTH_OF_EVIDENCE_PROVIDED_VALUES, mc_is_required=False + v, + System.CCV, + valid_codes=STRENGTH_OF_EVIDENCE_PROVIDED_VALUES, + mc_is_required=False, ) @model_validator(mode="before") - def validate_evidence_outcome(cls, values: dict) -> dict: # noqa: N805 - """Validate ``evidenceOutcome`` property if it exists + def validate_model(cls, values: dict) -> dict: # noqa: N805 + """Validate ``evidenceOutcome`` and ``directionOfEvidenceProvided`` properties :param values: Input values :raises ValueError: If ``evidenceOutcome`` exists and is invalid :return: Validated input values. If ``evidenceOutcome`` exists, then it will be - validated and converted to a ``MappableConcept`` + validated and converted to a ``MappableConcept``. + Or if ``strengthOfEvidenceProvided`` is not provided when + ``directionOfEvidenceProvided`` is supports or disputes or if + ``strengthOfEvidenceProvided`` is provided when + ``directionOfEvidenceProvided`` is neutral """ - return cls._validate_evidence_outcome( - values, System.CCV, EVIDENCE_OUTCOME_VALUES - ) + cls._validate_direction_of_evidence_provided(values) + ccv_code_pattern = r"^((?:OVS1|SBVS1)(?:_(?:not_met|(?:strong|moderate|supporting)))?|(?:OS[1-3]|SBS[1-2])(?:_(?:not_met|(?:very_strong|moderate|supporting)))?|(?:OM[1-4])(?:_(?:not_met|(?:very_strong|strong|supporting)))?|(OP[1-4]|SBP[1-2])(?:_(?:not_met|very_strong|strong|moderate))?)$" + return cls._validate_evidence_outcome(values, System.CCV, ccv_code_pattern) -class VariantOncogenicityStudyStatement(BaseModel, StatementValidatorMixin): +class VariantOncogenicityStudyStatement(Statement): """A statement reporting a conclusion from a single study about whether a variant is associated with oncogenicity (positive or negative) - based on interpretation of the study's results. """ - proposition: VariantOncogenicityProposition | None = Field( - None, + proposition: VariantOncogenicityProposition = Field( + ..., description="A proposition about the oncogenicity of a variant, for which the study provides evidence. The validity of this proposition, and the level of confidence/evidence supporting it, may be assessed and reported by the Statement.", ) strength: MappableConcept | None = Field( @@ -119,7 +128,9 @@ def validate_strength(cls, v: MappableConcept | None) -> MappableConcept | None: :raises ValueError: If invalid strength values are provided :return: Validated strength value """ - return validate_mappable_concept(v, System.CCV, STRENGTHS, mc_is_required=False) + return validate_mappable_concept( + v, System.CCV, valid_codes=STRENGTHS, mc_is_required=False + ) @field_validator("classification") @classmethod @@ -131,5 +142,5 @@ def validate_classification(cls, v: MappableConcept) -> MappableConcept: :return: Validated classification value """ return validate_mappable_concept( - v, System.CCV, CCV_CLASSIFICATIONS, mc_is_required=True + v, System.CCV, valid_codes=CCV_CLASSIFICATIONS, mc_is_required=True ) diff --git a/submodules/va_spec b/submodules/va_spec index 1ab0ef7..9081afb 160000 --- a/submodules/va_spec +++ b/submodules/va_spec @@ -1 +1 @@ -Subproject commit 1ab0ef7fadd08fd1687007e59d24c5f7813aa749 +Subproject commit 9081afb966d1ff62b4289546b6f481baf17e4ad2 diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 93eae0e..5d2adc5 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -9,7 +9,7 @@ from ga4gh.va_spec import acmg_2015, base, ccv_2022 from ga4gh.va_spec.aac_2017.models import VariantTherapeuticResponseStudyStatement from ga4gh.va_spec.acmg_2015.models import ( - VariantPathogenicityFunctionalImpactEvidenceLine, + VariantPathogenicityEvidenceLine, VariantPathogenicityStatement, ) from ga4gh.va_spec.base import ( @@ -19,7 +19,7 @@ ) from ga4gh.va_spec.base.core import EvidenceLine, Method, StudyGroup, StudyResult from ga4gh.va_spec.ccv_2022.models import ( - VariantOncogenicityFunctionalImpactEvidenceLine, + VariantOncogenicityEvidenceLine, VariantOncogenicityStudyStatement, ) from pydantic import ValidationError @@ -121,6 +121,7 @@ def test_evidence_line(caf): el_dict = { "type": "EvidenceLine", "hasEvidenceItems": [ + iriReference(root="evidence.json#/1"), { "id": "civic.eid:2997", "type": "Statement", @@ -174,12 +175,13 @@ def test_evidence_line(caf): "type": "Method", }, "direction": "supports", - } + }, ], "directionOfEvidenceProvided": "disputes", } el = EvidenceLine(**el_dict) - assert isinstance(el.hasEvidenceItems[0], VariantTherapeuticResponseStudyStatement) + assert isinstance(el.hasEvidenceItems[0], iriReference) + assert isinstance(el.hasEvidenceItems[1], VariantTherapeuticResponseStudyStatement) el_dict = { "type": "EvidenceLine", @@ -289,12 +291,12 @@ def test_variant_pathogenicity_stmt(): invalid_params = deepcopy(params) del invalid_params["proposition"] # proposition is required for statement - with pytest.raises(ValueError, match="Must be a `Statement`"): + with pytest.raises(ValueError, match="Field required"): VariantPathogenicityStatement(**invalid_params) def test_variant_pathogenicity_el(): - """Ensure VariantPathogenicityFunctionalImpactEvidenceLine model works as expected""" + """Ensure VariantPathogenicityEvidenceLine model works as expected""" params = { "type": "EvidenceLine", "specifiedBy": { @@ -306,6 +308,7 @@ def test_variant_pathogenicity_el(): "pmid": 25741868, "name": "ACMG Guidelines, 2015", }, + "methodType": "PS3", }, "directionOfEvidenceProvided": "supports", "evidenceOutcome": { @@ -315,8 +318,14 @@ def test_variant_pathogenicity_el(): }, "name": "ACMG 2015 PS3 Supporting Criterion Met", }, + "strengthOfEvidenceProvided": { + "primaryCoding": { + "system": "ACMG Guidelines, 2015", + "code": "supporting", + } + }, } - vp = VariantPathogenicityFunctionalImpactEvidenceLine(**params) + vp = VariantPathogenicityEvidenceLine(**params) assert isinstance(vp.specifiedBy, Method) assert vp.evidenceOutcome == MappableConcept( @@ -326,26 +335,43 @@ def test_variant_pathogenicity_el(): name="ACMG 2015 PS3 Supporting Criterion Met", ) - valid_params = deepcopy(params) - valid_params["strengthOfEvidenceProvided"] = None - assert VariantPathogenicityFunctionalImpactEvidenceLine(**valid_params) + invalid_params = deepcopy(params) + invalid_params["evidenceOutcome"]["primaryCoding"]["code"] = "PS3 supporting" + with pytest.raises( + ValueError, + match="`primaryCoding.code` does not match regex pattern", + ): + VariantPathogenicityEvidenceLine(**invalid_params) + + invalid_params = deepcopy(params) + invalid_params["strengthOfEvidenceProvided"] = None + with pytest.raises( + ValueError, + match="`strengthOfEvidenceProvided` is required when `directionOfEvidenceProvided` is 'supports' or 'disputes'.", + ): + VariantPathogenicityEvidenceLine(**invalid_params) + + invalid_params = deepcopy(params) + invalid_params["strengthOfEvidenceProvided"]["primaryCoding"]["code"] = "definitive" + with pytest.raises(ValueError, match="`primaryCoding.code` must be one of"): + VariantPathogenicityEvidenceLine(**invalid_params) invalid_params = deepcopy(params) del invalid_params["specifiedBy"]["reportedIn"] with pytest.raises(ValueError, match="`reportedIn` is required"): - VariantPathogenicityFunctionalImpactEvidenceLine(**invalid_params) + VariantPathogenicityEvidenceLine(**invalid_params) invalid_params = deepcopy(params) del invalid_params[ "directionOfEvidenceProvided" ] # directionOfEvidenceProvided is required for statement - with pytest.raises(ValueError, match="Must be an `EvidenceLine`"): - VariantPathogenicityFunctionalImpactEvidenceLine(**invalid_params) + with pytest.raises(ValueError, match="Field required"): + VariantPathogenicityEvidenceLine(**invalid_params) invalid_params = deepcopy(params) invalid_params["strengthOfEvidenceProvided"] = {"name": "test"} with pytest.raises(ValueError, match="`primaryCoding` is required."): - VariantPathogenicityFunctionalImpactEvidenceLine(**invalid_params) + VariantPathogenicityEvidenceLine(**invalid_params) invalid_params = deepcopy(params) invalid_params["strengthOfEvidenceProvided"] = { @@ -355,14 +381,30 @@ def test_variant_pathogenicity_el(): } } with pytest.raises(ValueError, match="`primaryCoding.system` must be"): - VariantPathogenicityFunctionalImpactEvidenceLine(**invalid_params) + VariantPathogenicityEvidenceLine(**invalid_params) invalid_params = deepcopy(params) invalid_params["strengthOfEvidenceProvided"] = { "primaryCoding": {"system": "ACMG Guidelines, 2015", "code": "PS3"} } with pytest.raises(ValueError, match="`primaryCoding.code` must be"): - VariantPathogenicityFunctionalImpactEvidenceLine(**invalid_params) + VariantPathogenicityEvidenceLine(**invalid_params) + + invalid_params = deepcopy(params) + invalid_params["specifiedBy"]["methodType"] = "OS1" + with pytest.raises( + ValueError, + match="'OS1' is not a valid VariantPathogenicityEvidenceLine.Criterion", + ): + VariantPathogenicityEvidenceLine(**invalid_params) + + invalid_params = deepcopy(params) + invalid_params["directionOfEvidenceProvided"] = "neutral" + with pytest.raises( + ValueError, + match="`strengthOfEvidenceProvided` is not allowed when `directionOfEvidenceProvided` is 'neutral'.", + ): + VariantPathogenicityEvidenceLine(**invalid_params) def test_variant_onco_stmt(): @@ -391,6 +433,10 @@ def test_variant_onco_stmt(): } assert VariantOncogenicityStudyStatement(**params) + valid_params = deepcopy(params) + valid_params["strength"] = None + assert VariantOncogenicityStudyStatement(**valid_params) + invalid_params = deepcopy(params) invalid_params["strength"]["primaryCoding"]["code"] = "oncogenic" with pytest.raises(ValueError, match="`primaryCoding.code` must be one of"): @@ -415,8 +461,8 @@ def test_variant_onco_stmt(): def test_variant_onco_el(): - """Ensure VariantOncogenicityFunctionalImpactEvidenceLine model works as expected""" - vo = VariantOncogenicityFunctionalImpactEvidenceLine( + """Ensure VariantOncogenicityEvidenceLine model works as expected""" + vo = VariantOncogenicityEvidenceLine( type="EvidenceLine", specifiedBy={ "type": "Method", @@ -425,6 +471,7 @@ def test_variant_onco_el(): "pmid": 35101336, "name": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", }, + "methodType": "OS2", }, directionOfEvidenceProvided="supports", scoreOfEvidenceProvided=1, @@ -434,6 +481,12 @@ def test_variant_onco_el(): "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", }, }, + strengthOfEvidenceProvided={ + "primaryCoding": { + "code": "supporting", + "system": "ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022", + } + }, ) assert isinstance(vo.specifiedBy, Method) assert vo.evidenceOutcome == MappableConcept( @@ -443,6 +496,37 @@ def test_variant_onco_el(): ), ) + vo_invalid_params = vo.model_copy(deep=True).model_dump() + vo_invalid_params["specifiedBy"]["methodType"] = "PS1" + with pytest.raises( + ValueError, + match="'PS1' is not a valid VariantOncogenicityEvidenceLine.Criterion", + ): + VariantOncogenicityEvidenceLine(**vo_invalid_params) + + invalid_params = vo.model_copy(deep=True).model_dump() + invalid_params["strengthOfEvidenceProvided"]["primaryCoding"]["code"] = "definitive" + with pytest.raises(ValueError, match="`primaryCoding.code` must be one of"): + VariantOncogenicityEvidenceLine(**invalid_params) + + invalid_params = vo.model_copy(deep=True).model_dump() + invalid_params["strengthOfEvidenceProvided"]["primaryCoding"]["system"] = ( + "ACMG Guidelines, 2015" + ) + with pytest.raises( + ValueError, + match="`primaryCoding.system` must be 'ClinGen/CGC/VICC Guidelines for Oncogenicity, 2022'.", + ): + VariantOncogenicityEvidenceLine(**invalid_params) + + invalid_params = vo.model_copy(deep=True).model_dump() + invalid_params["directionOfEvidenceProvided"] = "neutral" + with pytest.raises( + ValueError, + match="`strengthOfEvidenceProvided` is not allowed when `directionOfEvidenceProvided` is 'neutral'.", + ): + VariantOncogenicityEvidenceLine(**invalid_params) + def test_examples(test_definitions): """Test VA Spec examples"""