From 59d48e3fbfbf5a0abed0bd9c8ceb87564ad1d346 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Sun, 20 Jul 2025 11:56:20 -0400 Subject: [PATCH 1/4] feat: update va spec models to 1.0.1 close #34 * Make ConditionSet nestable to represent complex relationships --- src/ga4gh/va_spec/base/domain_entities.py | 4 +- tests/validation/test_va_spec_models.py | 81 +++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/ga4gh/va_spec/base/domain_entities.py b/src/ga4gh/va_spec/base/domain_entities.py index cba1641..a18f5d2 100644 --- a/src/ga4gh/va_spec/base/domain_entities.py +++ b/src/ga4gh/va_spec/base/domain_entities.py @@ -1,5 +1,7 @@ """VA Spec Shared Domain Entity Data Structures""" +from __future__ import annotations + from ga4gh.core.models import BaseModelForbidExtra, Element, MappableConcept from ga4gh.va_spec.base.enums import MembershipOperator from pydantic import ConfigDict, Field, RootModel @@ -13,7 +15,7 @@ class ConditionSet(Element, BaseModelForbidExtra): model_config = ConfigDict(use_enum_values=True) - conditions: list[MappableConcept] = Field( + conditions: list[MappableConcept | ConditionSet] = Field( ..., min_length=2, description="A list of conditions (diseases, phenotypes, traits) that are co-occurring.", diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 24dd980..43744c3 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -18,6 +18,7 @@ ExperimentalVariantFunctionalImpactStudyResult, ) from ga4gh.va_spec.base.core import EvidenceLine, Method, StudyGroup, StudyResult +from ga4gh.va_spec.base.domain_entities import ConditionSet from ga4gh.va_spec.ccv_2022.models import ( VariantOncogenicityEvidenceLine, VariantOncogenicityStudyStatement, @@ -49,6 +50,86 @@ def caf(): ) +def test_condition_set(): + """Ensure ConditionSet model works as expected""" + condition_set_dict = { + "membershipOperator": "AND", + "conditions": [ + { + "conceptType": "Disease", + "id": "civic.did:3387", + "mappings": [ + { + "coding": { + "code": "DOID:0081279", + "system": "https://disease-ontology.org/?id=", + }, + "relation": "exactMatch", + } + ], + "name": "Diffuse Astrocytoma, MYB- Or MYBL1-altered", + }, + { + "conditions": [ + { + "conceptType": "Phenotype", + "id": "civic.phenotype:8121", + "mappings": [ + { + "coding": { + "code": "HP:0011463", + "system": "https://hpo.jax.org/browse/term/", + }, + "relation": "exactMatch", + } + ], + "name": "Childhood onset", + }, + { + "conceptType": "Phenotype", + "id": "civic.phenotype:2656", + "mappings": [ + { + "coding": { + "code": "HP:0003621", + "id": "HP:0003621", + "system": "https://hpo.jax.org/browse/term/", + }, + "relation": "exactMatch", + } + ], + "name": "Juvenile onset", + }, + { + "conceptType": "Phenotype", + "id": "civic.phenotype:2643", + "mappings": [ + { + "coding": { + "code": "HP:0003581", + "system": "https://hpo.jax.org/browse/term/", + }, + "relation": "exactMatch", + } + ], + "name": "Adult onset", + }, + ], + "membershipOperator": "OR", + }, + ], + } + assert ConditionSet(**condition_set_dict) + + invalid_params = deepcopy(condition_set_dict) + invalid_params["conditions"].pop() + + with pytest.raises( + ValidationError, match="List should have at least 2 items after validation" + ): + ConditionSet(**invalid_params) + + def test_agent(): """Ensure Agent model works as expected""" agent = Agent(name="Joe") From e847820a326b313781ce5721b20b8378fbf863a2 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 24 Jul 2025 08:57:57 -0400 Subject: [PATCH 2/4] update submodules --- submodules/va_spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodules/va_spec b/submodules/va_spec index 6263d7e..da35aa0 160000 --- a/submodules/va_spec +++ b/submodules/va_spec @@ -1 +1 @@ -Subproject commit 6263d7e50ac15947a50217ea0aa99761e1a914a9 +Subproject commit da35aa0286aa209b24e8d3a827ddd5a491ab5350 From 245a2c7914625228c9041bff3a81121a89cce3e6 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 24 Jul 2025 09:00:36 -0400 Subject: [PATCH 3/4] update docstrings --- src/ga4gh/va_spec/base/domain_entities.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ga4gh/va_spec/base/domain_entities.py b/src/ga4gh/va_spec/base/domain_entities.py index a18f5d2..5753043 100644 --- a/src/ga4gh/va_spec/base/domain_entities.py +++ b/src/ga4gh/va_spec/base/domain_entities.py @@ -8,9 +8,10 @@ class ConditionSet(Element, BaseModelForbidExtra): - """A set of conditions (diseases, phenotypes, traits). - A set of two or more conditions that co-occur in the same patient/subject, or are - manifest individually in a different subset of participants in a research study. + """A set of conditions (diseases, phenotypes, traits) that occur together or are + related, depending on the membership operator, and may manifest together in the + same patient or individually in a different subset of participants in a research + study. """ model_config = ConfigDict(use_enum_values=True) @@ -18,7 +19,7 @@ class ConditionSet(Element, BaseModelForbidExtra): conditions: list[MappableConcept | ConditionSet] = Field( ..., min_length=2, - description="A list of conditions (diseases, phenotypes, traits) that are co-occurring.", + description="A list of conditions (diseases, phenotypes, traits) that are co-occurring or related, depending on the membership operator.", ) membershipOperator: MembershipOperator = Field( ..., From 085b11111c40890d9e18bc9c09c28f08d85c13af Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 24 Jul 2025 11:25:56 -0400 Subject: [PATCH 4/4] test: add tests for validating va-spec test fixtures against pydantic models close #33 --- tests/conftest.py | 29 +++++++ .../test_va_spec_fixtures_validation.py | 42 ++++++++++ tests/validation/test_va_spec_models.py | 81 ------------------- tests/validation/test_va_spec_schema.py | 27 ++----- 4 files changed, 78 insertions(+), 101 deletions(-) create mode 100644 tests/validation/test_va_spec_fixtures_validation.py diff --git a/tests/conftest.py b/tests/conftest.py index e2a5639..369e19f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,34 @@ """Provide utilities for test cases.""" +from enum import Enum from pathlib import Path SUBMODULES_DIR = Path(__file__).parents[1] / "submodules" / "va_spec" + + +class VaSpecSchema(str, Enum): + """Enum for VA-Spec schema""" + + AAC_2017 = "aac-2017" + ACMG_2015 = "acmg-2015" + BASE = "base" + CCV_2022 = "ccv-2022" + + +def get_va_spec_schema(label: str) -> str | None: + """Get VA-Spec schema given label + + :param label: Label + :return: VA-Spec label + """ + if label.endswith(VaSpecSchema.AAC_2017): + schema = VaSpecSchema.AAC_2017 + elif label.endswith(VaSpecSchema.ACMG_2015): + schema = VaSpecSchema.ACMG_2015 + elif label.endswith(VaSpecSchema.BASE): + schema = VaSpecSchema.BASE + elif label.endswith(VaSpecSchema.CCV_2022): + schema = VaSpecSchema.CCV_2022 + else: + schema = None + return schema diff --git a/tests/validation/test_va_spec_fixtures_validation.py b/tests/validation/test_va_spec_fixtures_validation.py new file mode 100644 index 0000000..0c94451 --- /dev/null +++ b/tests/validation/test_va_spec_fixtures_validation.py @@ -0,0 +1,42 @@ +"""Ensure that VA-Spec test fixtures validate against Pydantic models""" + +import yaml +from ga4gh.va_spec import aac_2017, acmg_2015, base, ccv_2022 + +from tests.conftest import SUBMODULES_DIR, VaSpecSchema, get_va_spec_schema + +VA_SPEC_TESTS_DIR = SUBMODULES_DIR / "tests" + + +with (VA_SPEC_TESTS_DIR / "test_definitions.yaml").open() as f: + data = yaml.load(f, Loader=yaml.SafeLoader) + test_definitions = data["tests"] + +SCHEMA_TO_PYDANTIC_MODULE = { + VaSpecSchema.AAC_2017: aac_2017, + VaSpecSchema.ACMG_2015: acmg_2015, + VaSpecSchema.CCV_2022: ccv_2022, + VaSpecSchema.BASE: base, +} +VA_SPEC_TEST_DEFINITIONS = {schema: [] for schema in VaSpecSchema} + + +for test_def in test_definitions: + if test_def["namespace"].startswith("va-spec."): + schema = get_va_spec_schema(test_def["namespace"].split("va-spec.")[-1]) + VA_SPEC_TEST_DEFINITIONS[schema].append(test_def) + + +def test_va_spec_fixtures(): + """Test that VA-Spec test fixtures validate against Pydantic models""" + for va_spec_schema, schema_test_defs in VA_SPEC_TEST_DEFINITIONS.items(): + pydantic_module = SCHEMA_TO_PYDANTIC_MODULE[va_spec_schema] + + for schema_test_def in schema_test_defs: + with ( + VA_SPEC_TESTS_DIR / "fixtures" / schema_test_def["test_file"] + ).open() as f: + test_fixture_dict = yaml.load(f, Loader=yaml.SafeLoader) + va_spec_class = schema_test_def["definition"] + pydantic_model = getattr(pydantic_module, va_spec_class) + assert pydantic_model(**test_fixture_dict) diff --git a/tests/validation/test_va_spec_models.py b/tests/validation/test_va_spec_models.py index 43744c3..24dd980 100644 --- a/tests/validation/test_va_spec_models.py +++ b/tests/validation/test_va_spec_models.py @@ -18,7 +18,6 @@ ExperimentalVariantFunctionalImpactStudyResult, ) from ga4gh.va_spec.base.core import EvidenceLine, Method, StudyGroup, StudyResult -from ga4gh.va_spec.base.domain_entities import ConditionSet from ga4gh.va_spec.ccv_2022.models import ( VariantOncogenicityEvidenceLine, VariantOncogenicityStudyStatement, @@ -50,86 +49,6 @@ def caf(): ) -def test_condition_set(): - """Ensure ConditionSet model works as expected""" - condition_set_dict = { - "membershipOperator": "AND", - "conditions": [ - { - "conceptType": "Disease", - "id": "civic.did:3387", - "mappings": [ - { - "coding": { - "code": "DOID:0081279", - "system": "https://disease-ontology.org/?id=", - }, - "relation": "exactMatch", - } - ], - "name": "Diffuse Astrocytoma, MYB- Or MYBL1-altered", - }, - { - "conditions": [ - { - "conceptType": "Phenotype", - "id": "civic.phenotype:8121", - "mappings": [ - { - "coding": { - "code": "HP:0011463", - "system": "https://hpo.jax.org/browse/term/", - }, - "relation": "exactMatch", - } - ], - "name": "Childhood onset", - }, - { - "conceptType": "Phenotype", - "id": "civic.phenotype:2656", - "mappings": [ - { - "coding": { - "code": "HP:0003621", - "id": "HP:0003621", - "system": "https://hpo.jax.org/browse/term/", - }, - "relation": "exactMatch", - } - ], - "name": "Juvenile onset", - }, - { - "conceptType": "Phenotype", - "id": "civic.phenotype:2643", - "mappings": [ - { - "coding": { - "code": "HP:0003581", - "system": "https://hpo.jax.org/browse/term/", - }, - "relation": "exactMatch", - } - ], - "name": "Adult onset", - }, - ], - "membershipOperator": "OR", - }, - ], - } - assert ConditionSet(**condition_set_dict) - - invalid_params = deepcopy(condition_set_dict) - invalid_params["conditions"].pop() - - with pytest.raises( - ValidationError, match="List should have at least 2 items after validation" - ): - ConditionSet(**invalid_params) - - def test_agent(): """Ensure Agent model works as expected""" agent = Agent(name="Joe") diff --git a/tests/validation/test_va_spec_schema.py b/tests/validation/test_va_spec_schema.py index 94777cb..192f737 100644 --- a/tests/validation/test_va_spec_schema.py +++ b/tests/validation/test_va_spec_schema.py @@ -1,27 +1,21 @@ """Test that VA-Spec Python Pydantic models match corresponding JSON schemas""" import json -from enum import Enum from pathlib import Path import pytest from ga4gh.va_spec import aac_2017, acmg_2015, base, ccv_2022 from pydantic import BaseModel -from tests.conftest import SUBMODULES_DIR +from tests.conftest import ( + SUBMODULES_DIR, + VaSpecSchema, + get_va_spec_schema, +) VA_SCHEMA_DIR = SUBMODULES_DIR / "schema" / "va-spec" -class VaSpecSchema(str, Enum): - """Enum for VA-Spec schema""" - - AAC_2017 = "aac-2017" - ACMG_2015 = "acmg-2015" - BASE = "base" - CCV_2022 = "ccv-2022" - - class VaSpecSchemaMapping(BaseModel): """Model for representing VA-Spec Schema concrete classes, primitives, and schema""" @@ -59,15 +53,8 @@ def _update_va_spec_schema_mapping( # Get core + profiles classes for child in VA_SCHEMA_DIR.iterdir(): child_str = str(child) - if child_str.endswith(VaSpecSchema.AAC_2017): - mapping_key = VaSpecSchema.AAC_2017 - elif child_str.endswith(VaSpecSchema.ACMG_2015): - mapping_key = VaSpecSchema.ACMG_2015 - elif child_str.endswith(VaSpecSchema.BASE): - mapping_key = VaSpecSchema.BASE - elif child_str.endswith(VaSpecSchema.CCV_2022): - mapping_key = VaSpecSchema.CCV_2022 - else: + mapping_key = get_va_spec_schema(child_str) + if not mapping_key: continue mapping = VA_SPEC_SCHEMA_MAPPING[mapping_key]