Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,6 @@ exclude = [
"INP001",
"SLF001",
]
"src/ga4gh/vrs/utils/hgvs_tools.py" = [
"ANN001",
"ANN201",
"ANN202",
"D102",
]
"src/ga4gh/vrs/normalize.py" = [
"ANN001",
"ANN201",
Expand All @@ -203,11 +197,6 @@ exclude = [
"src/ga4gh/vrs/extras/vcf_annotation.py" = [
"PTH123", # see https://github.com/ga4gh/vrs-python/issues/482
]
"src/ga4gh/vrs/extras/translator.py" = [
"ANN001",
"ANN101",
"ANN2",
]
"src/ga4gh/vrs/extras/object_store.py" = [
"ANN",
"D",
Expand Down
2 changes: 1 addition & 1 deletion src/ga4gh/vrs/extras/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections.abc import Callable


def lazy_property(fn: Callable): # noqa: ANN201
def lazy_property(fn: Callable) -> property:
"""Provide a decorator that makes a property lazy-evaluated.

[mv]
Expand Down
59 changes: 40 additions & 19 deletions src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ class _Translator(ABC): # noqa: B024
def __init__(
self,
data_proxy: _DataProxy,
default_assembly_name="GRCh38",
identify=True,
default_assembly_name: str = "GRCh38",
identify: bool = True,
rle_seq_limit: int | None = 50,
):
self.default_assembly_name = default_assembly_name
Expand All @@ -56,9 +56,10 @@ def __init__(
self.rle_seq_limit = rle_seq_limit
self.from_translators = {}
self.to_translators = {}
return

def translate_from(self, var, fmt=None, **kwargs):
def translate_from(
self, var: str, fmt: str | None = None, **kwargs
) -> models._VariationBase:
"""Translate variation `var` to VRS object

If `fmt` is None, guess the appropriate format and return the variant.
Expand Down Expand Up @@ -109,7 +110,7 @@ def translate_from(self, var, fmt=None, **kwargs):
msg = f"Unable to parse data as {', '.join(formats)}"
raise ValueError(msg)

def translate_to(self, vo, fmt):
def translate_to(self, vo: models._VariationBase, fmt: str) -> str:
"""Translate vrs object `vo` to named format `fmt`"""
t = self.to_translators[fmt]
return t(vo)
Expand All @@ -118,11 +119,11 @@ def translate_to(self, vo, fmt):
# INTERNAL

@lazy_property
def hgvs_tools(self):
def hgvs_tools(self) -> HgvsTools:
"""Instantiate and return an HgvsTools instance"""
return HgvsTools(self.data_proxy)

def _from_vrs(self, var):
def _from_vrs(self, var: dict) -> models._VariationBase | None:
"""Convert from dict representation of VRS JSON to VRS object"""
if not isinstance(var, Mapping):
return None
Expand All @@ -138,7 +139,12 @@ def _from_vrs(self, var):
class AlleleTranslator(_Translator):
"""Class for translating formats to and from VRS Alleles"""

def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True):
def __init__(
self,
data_proxy: _DataProxy,
default_assembly_name: str = "GRCh38",
identify: bool = True,
):
"""Initialize AlleleTranslator class"""
super().__init__(data_proxy, default_assembly_name, identify)

Expand All @@ -155,7 +161,7 @@ def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True):
"spdi": self._to_spdi,
}

def _create_allele(self, values: dict, **kwargs):
def _create_allele(self, values: dict, **kwargs) -> models.Allele:
"""Create an allele object with the given parameters.

Args:
Expand All @@ -178,7 +184,7 @@ def _create_allele(self, values: dict, **kwargs):
allele = models.Allele(location=location, state=state)
return self._post_process_imported_allele(allele, **kwargs)

def _from_beacon(self, beacon_expr, **kwargs):
def _from_beacon(self, beacon_expr: str, **kwargs) -> models.Allele | None:
"""Parse beacon expression into VRS Allele

kwargs:
Expand Down Expand Up @@ -239,7 +245,7 @@ def _from_beacon(self, beacon_expr, **kwargs):
}
return self._create_allele(values, **kwargs)

def _from_gnomad(self, gnomad_expr, **kwargs):
def _from_gnomad(self, gnomad_expr: str, **kwargs) -> models.Allele | None:
"""Parse gnomAD-style VCF expression into VRS Allele

kwargs:
Expand Down Expand Up @@ -312,13 +318,13 @@ def _from_gnomad(self, gnomad_expr, **kwargs):
}
return self._create_allele(values, **kwargs)

def _from_hgvs(self, hgvs_expr: str, **kwargs):
def _from_hgvs(self, hgvs_expr: str, **kwargs) -> models.Allele | None:
allele_values = self.hgvs_tools.extract_allele_values(hgvs_expr)
if allele_values:
return self._create_allele(allele_values, **kwargs)
return None

def _from_spdi(self, spdi_expr, **kwargs):
def _from_spdi(self, spdi_expr: str, **kwargs) -> models.Allele | None:
"""Parse SPDI expression in to a GA4GH Allele

kwargs:
Expand Down Expand Up @@ -378,10 +384,14 @@ def _from_spdi(self, spdi_expr, **kwargs):

return self._create_allele(values, **kwargs)

def _to_hgvs(self, vo, namespace="refseq"):
def _to_hgvs(
self, vo: models.Allele, namespace: str | None = "refseq"
) -> list[str]:
return self.hgvs_tools.from_allele(vo, namespace)

def _to_spdi(self, vo, namespace="refseq"):
def _to_spdi(
self, vo: models.Allele, namespace: str | None = "refseq"
) -> list[str]:
"""Generate a *list* of SPDI expressions for VRS Allele.

If `namespace` is not None, returns SPDI strings for the
Expand All @@ -405,7 +415,9 @@ def _to_spdi(self, vo, namespace="refseq"):
spdi_tail = f":{start}:{end - start}:{vo.state.sequence.root}"
return [a + spdi_tail for a in aliases]

def _post_process_imported_allele(self, allele, **kwargs):
def _post_process_imported_allele(
self, allele: models.Allele, **kwargs
) -> models.Allele:
"""Provide common post-processing for imported Alleles IN-PLACE.

:param allele: VRS Allele object
Expand Down Expand Up @@ -435,14 +447,21 @@ def _post_process_imported_allele(self, allele, **kwargs):
class CnvTranslator(_Translator):
"""Class for translating formats from format to VRS Copy Number"""

def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True):
def __init__(
self,
data_proxy: _DataProxy,
default_assembly_name: str = "GRCh38",
identify: bool = True,
):
"""Initialize CnvTranslator class"""
super().__init__(data_proxy, default_assembly_name, identify)
self.from_translators = {
"hgvs": self._from_hgvs,
}

def _from_hgvs(self, hgvs_dup_del_expr: str, **kwargs):
def _from_hgvs(
self, hgvs_dup_del_expr: str, **kwargs
) -> models.CopyNumberChange | models.CopyNumberCount | None:
"""Parse hgvs into a VRS CNV Object

kwargs:
Expand Down Expand Up @@ -495,7 +514,9 @@ def _from_hgvs(self, hgvs_dup_del_expr: str, **kwargs):

return self._post_process_imported_cnv(cnv)

def _post_process_imported_cnv(self, copy_number):
def _post_process_imported_cnv(
self, copy_number: models.CopyNumberChange | models.CopyNumberCount
) -> models.CopyNumberChange | models.CopyNumberCount:
"""Provide common post-processing for imported Copy Numbers IN-PLACE."""
if self.identify:
copy_number.id = ga4gh_identify(copy_number)
Expand Down
36 changes: 22 additions & 14 deletions src/ga4gh/vrs/utils/hgvs_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import hgvs.dataproviders.uta
import hgvs.normalizer
import hgvs.parser
import hgvs.sequencevariant
import hgvs.variantmapper
from hgvs.sequencevariant import SequenceVariant as HgvsSequenceVariant

from ga4gh.vrs import models
from ga4gh.vrs.dataproxy import _DataProxy, create_dataproxy
from ga4gh.vrs.dataproxy import _DataProxy

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,7 +53,7 @@ def close(self) -> None:
self.uta_conn.close()

# convenience methods for hgvs parsing, normalization, and some mappings
def parse(self, hgvs_str):
def parse(self, hgvs_str: str) -> HgvsSequenceVariant | None:
"""Parse the given HGVS string and returns the corresponding variant.

Args:
Expand All @@ -67,7 +67,7 @@ def parse(self, hgvs_str):
return None
return self.parser.parse_hgvs_variant(hgvs_str)

def is_intronic(self, sv: hgvs.sequencevariant.SequenceVariant):
def is_intronic(self, sv: HgvsSequenceVariant) -> bool:
"""Check if the given SequenceVariant is intronic.

Args:
Expand All @@ -81,12 +81,13 @@ def is_intronic(self, sv: hgvs.sequencevariant.SequenceVariant):
return sv.posedit.pos.start.is_intronic or sv.posedit.pos.end.is_intronic
return False

def get_edit_type(self, sv: hgvs.sequencevariant.SequenceVariant):
def get_edit_type(self, sv: HgvsSequenceVariant) -> str | None:
"""Safely extract ``type`` property from SequenceVariant"""
if sv is None or sv.posedit is None or sv.posedit.edit is None:
return None
return sv.posedit.edit.type

def get_position_and_state(self, sv: hgvs.sequencevariant.SequenceVariant):
def get_position_and_state(self, sv: HgvsSequenceVariant) -> tuple[int, int, str]:
"""Get the details of a sequence variant.

Args:
Expand Down Expand Up @@ -130,7 +131,7 @@ def get_position_and_state(self, sv: hgvs.sequencevariant.SequenceVariant):

return start, end, state

def extract_allele_values(self, hgvs_expr: str):
def extract_allele_values(self, hgvs_expr: str) -> dict | None:
"""Parse hgvs into a VRS Allele Object

kwargs:
Expand Down Expand Up @@ -187,7 +188,7 @@ def extract_allele_values(self, hgvs_expr: str):
"literal_sequence": state,
}

def from_allele(self, vo, namespace=None):
def from_allele(self, vo: models.Allele, namespace: str | None = None) -> list[str]:
"""Generate a *list* of HGVS expressions for VRS Allele.

If `namespace` is not None, returns HGVS strings for the
Expand Down Expand Up @@ -259,7 +260,9 @@ def from_allele(self, vo, namespace=None):

return list(set(hgvs_exprs))

def _to_sequence_variant(self, vo, sequence_type, sequence, accession):
def _to_sequence_variant(
self, vo: models.Allele, sequence_type: str, sequence: str, accession: str
) -> HgvsSequenceVariant:
"""Create a SequenceVariant object from an Allele object."""
# build interval and edit depending on sequence type
if sequence_type == "p":
Expand All @@ -285,7 +288,7 @@ def _to_sequence_variant(self, vo, sequence_type, sequence, accession):
edit = hgvs.edit.NARefAlt(ref=ref, alt=alt)

posedit = hgvs.posedit.PosEdit(pos=ival, edit=edit)
var = hgvs.sequencevariant.SequenceVariant(
var = HgvsSequenceVariant(
ac=accession,
# at this point, use `n.` because the positions are absolute (not CDS),
# this will subsequently be converted back to `c.` after hgvs normalization
Expand All @@ -307,19 +310,24 @@ def _to_sequence_variant(self, vo, sequence_type, sequence, accession):

return var

def normalize(self, hgvs):
def normalize(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant:
"""Perform hgvs library `normalize()` method"""
return self.normalizer.normalize(hgvs)

def n_to_c(self, hgvs):
def n_to_c(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant:
"""Perform hgvs library `n_to_c` method"""
return self.variant_mapper.n_to_c(hgvs)

def c_to_n(self, hgvs):
def c_to_n(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant:
"""Perform hgvs library `c_to_n` method"""
return self.variant_mapper.c_to_n(hgvs)


if __name__ == "__main__":
import os

from ga4gh.vrs.dataproxy import create_dataproxy

seqrepo_uri = os.environ.get(
"SEQREPO_URI", "seqrepo+file:///usr/local/share/seqrepo/latest"
)
Expand All @@ -345,7 +353,7 @@ def c_to_n(self, hgvs):
"state": {"type": "LiteralSequenceExpression", "sequence": "G"},
}

vrs_allele = models.Allele.parse_obj(allele_dict)
vrs_allele = models.Allele.model_validate(allele_dict)
dp = create_dataproxy(seqrepo_uri)
hgvs_tools = HgvsTools(dp)
hgvs_expr = hgvs_tools.from_allele(vrs_allele, namespace="refseq")
Expand Down
Loading