diff --git a/pyproject.toml b/pyproject.toml index 39ff55cb..4789a316 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -189,12 +189,6 @@ exclude = [ "INP001", "SLF001", ] -"src/ga4gh/vrs/utils/hgvs_tools.py" = [ - "ANN001", - "ANN201", - "ANN202", - "D102", -] "src/ga4gh/vrs/normalize.py" = [ "ANN001", "ANN201", @@ -203,11 +197,6 @@ exclude = [ "src/ga4gh/vrs/extras/vcf_annotation.py" = [ "PTH123", # see https://github.com/ga4gh/vrs-python/issues/482 ] -"src/ga4gh/vrs/extras/translator.py" = [ - "ANN001", - "ANN101", - "ANN2", -] "src/ga4gh/vrs/extras/object_store.py" = [ "ANN", "D", diff --git a/src/ga4gh/vrs/extras/decorators.py b/src/ga4gh/vrs/extras/decorators.py index 1d8575b5..c64d2bee 100644 --- a/src/ga4gh/vrs/extras/decorators.py +++ b/src/ga4gh/vrs/extras/decorators.py @@ -3,7 +3,7 @@ from collections.abc import Callable -def lazy_property(fn: Callable): # noqa: ANN201 +def lazy_property(fn: Callable) -> property: """Provide a decorator that makes a property lazy-evaluated. [mv] diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index ad85bbd6..4acda7de 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -46,8 +46,8 @@ class _Translator(ABC): # noqa: B024 def __init__( self, data_proxy: _DataProxy, - default_assembly_name="GRCh38", - identify=True, + default_assembly_name: str = "GRCh38", + identify: bool = True, rle_seq_limit: int | None = 50, ): self.default_assembly_name = default_assembly_name @@ -56,9 +56,10 @@ def __init__( self.rle_seq_limit = rle_seq_limit self.from_translators = {} self.to_translators = {} - return - def translate_from(self, var, fmt=None, **kwargs): + def translate_from( + self, var: str, fmt: str | None = None, **kwargs + ) -> models._VariationBase: """Translate variation `var` to VRS object If `fmt` is None, guess the appropriate format and return the variant. @@ -109,7 +110,7 @@ def translate_from(self, var, fmt=None, **kwargs): msg = f"Unable to parse data as {', '.join(formats)}" raise ValueError(msg) - def translate_to(self, vo, fmt): + def translate_to(self, vo: models._VariationBase, fmt: str) -> str: """Translate vrs object `vo` to named format `fmt`""" t = self.to_translators[fmt] return t(vo) @@ -118,11 +119,11 @@ def translate_to(self, vo, fmt): # INTERNAL @lazy_property - def hgvs_tools(self): + def hgvs_tools(self) -> HgvsTools: """Instantiate and return an HgvsTools instance""" return HgvsTools(self.data_proxy) - def _from_vrs(self, var): + def _from_vrs(self, var: dict) -> models._VariationBase | None: """Convert from dict representation of VRS JSON to VRS object""" if not isinstance(var, Mapping): return None @@ -138,7 +139,12 @@ def _from_vrs(self, var): class AlleleTranslator(_Translator): """Class for translating formats to and from VRS Alleles""" - def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True): + def __init__( + self, + data_proxy: _DataProxy, + default_assembly_name: str = "GRCh38", + identify: bool = True, + ): """Initialize AlleleTranslator class""" super().__init__(data_proxy, default_assembly_name, identify) @@ -155,7 +161,7 @@ def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True): "spdi": self._to_spdi, } - def _create_allele(self, values: dict, **kwargs): + def _create_allele(self, values: dict, **kwargs) -> models.Allele: """Create an allele object with the given parameters. Args: @@ -178,7 +184,7 @@ def _create_allele(self, values: dict, **kwargs): allele = models.Allele(location=location, state=state) return self._post_process_imported_allele(allele, **kwargs) - def _from_beacon(self, beacon_expr, **kwargs): + def _from_beacon(self, beacon_expr: str, **kwargs) -> models.Allele | None: """Parse beacon expression into VRS Allele kwargs: @@ -239,7 +245,7 @@ def _from_beacon(self, beacon_expr, **kwargs): } return self._create_allele(values, **kwargs) - def _from_gnomad(self, gnomad_expr, **kwargs): + def _from_gnomad(self, gnomad_expr: str, **kwargs) -> models.Allele | None: """Parse gnomAD-style VCF expression into VRS Allele kwargs: @@ -312,13 +318,13 @@ def _from_gnomad(self, gnomad_expr, **kwargs): } return self._create_allele(values, **kwargs) - def _from_hgvs(self, hgvs_expr: str, **kwargs): + def _from_hgvs(self, hgvs_expr: str, **kwargs) -> models.Allele | None: allele_values = self.hgvs_tools.extract_allele_values(hgvs_expr) if allele_values: return self._create_allele(allele_values, **kwargs) return None - def _from_spdi(self, spdi_expr, **kwargs): + def _from_spdi(self, spdi_expr: str, **kwargs) -> models.Allele | None: """Parse SPDI expression in to a GA4GH Allele kwargs: @@ -378,10 +384,14 @@ def _from_spdi(self, spdi_expr, **kwargs): return self._create_allele(values, **kwargs) - def _to_hgvs(self, vo, namespace="refseq"): + def _to_hgvs( + self, vo: models.Allele, namespace: str | None = "refseq" + ) -> list[str]: return self.hgvs_tools.from_allele(vo, namespace) - def _to_spdi(self, vo, namespace="refseq"): + def _to_spdi( + self, vo: models.Allele, namespace: str | None = "refseq" + ) -> list[str]: """Generate a *list* of SPDI expressions for VRS Allele. If `namespace` is not None, returns SPDI strings for the @@ -405,7 +415,9 @@ def _to_spdi(self, vo, namespace="refseq"): spdi_tail = f":{start}:{end - start}:{vo.state.sequence.root}" return [a + spdi_tail for a in aliases] - def _post_process_imported_allele(self, allele, **kwargs): + def _post_process_imported_allele( + self, allele: models.Allele, **kwargs + ) -> models.Allele: """Provide common post-processing for imported Alleles IN-PLACE. :param allele: VRS Allele object @@ -435,14 +447,21 @@ def _post_process_imported_allele(self, allele, **kwargs): class CnvTranslator(_Translator): """Class for translating formats from format to VRS Copy Number""" - def __init__(self, data_proxy, default_assembly_name="GRCh38", identify=True): + def __init__( + self, + data_proxy: _DataProxy, + default_assembly_name: str = "GRCh38", + identify: bool = True, + ): """Initialize CnvTranslator class""" super().__init__(data_proxy, default_assembly_name, identify) self.from_translators = { "hgvs": self._from_hgvs, } - def _from_hgvs(self, hgvs_dup_del_expr: str, **kwargs): + def _from_hgvs( + self, hgvs_dup_del_expr: str, **kwargs + ) -> models.CopyNumberChange | models.CopyNumberCount | None: """Parse hgvs into a VRS CNV Object kwargs: @@ -495,7 +514,9 @@ def _from_hgvs(self, hgvs_dup_del_expr: str, **kwargs): return self._post_process_imported_cnv(cnv) - def _post_process_imported_cnv(self, copy_number): + def _post_process_imported_cnv( + self, copy_number: models.CopyNumberChange | models.CopyNumberCount + ) -> models.CopyNumberChange | models.CopyNumberCount: """Provide common post-processing for imported Copy Numbers IN-PLACE.""" if self.identify: copy_number.id = ga4gh_identify(copy_number) diff --git a/src/ga4gh/vrs/utils/hgvs_tools.py b/src/ga4gh/vrs/utils/hgvs_tools.py index b49a6460..fbfb10d2 100644 --- a/src/ga4gh/vrs/utils/hgvs_tools.py +++ b/src/ga4gh/vrs/utils/hgvs_tools.py @@ -7,11 +7,11 @@ import hgvs.dataproviders.uta import hgvs.normalizer import hgvs.parser -import hgvs.sequencevariant import hgvs.variantmapper +from hgvs.sequencevariant import SequenceVariant as HgvsSequenceVariant from ga4gh.vrs import models -from ga4gh.vrs.dataproxy import _DataProxy, create_dataproxy +from ga4gh.vrs.dataproxy import _DataProxy _logger = logging.getLogger(__name__) @@ -53,7 +53,7 @@ def close(self) -> None: self.uta_conn.close() # convenience methods for hgvs parsing, normalization, and some mappings - def parse(self, hgvs_str): + def parse(self, hgvs_str: str) -> HgvsSequenceVariant | None: """Parse the given HGVS string and returns the corresponding variant. Args: @@ -67,7 +67,7 @@ def parse(self, hgvs_str): return None return self.parser.parse_hgvs_variant(hgvs_str) - def is_intronic(self, sv: hgvs.sequencevariant.SequenceVariant): + def is_intronic(self, sv: HgvsSequenceVariant) -> bool: """Check if the given SequenceVariant is intronic. Args: @@ -81,12 +81,13 @@ def is_intronic(self, sv: hgvs.sequencevariant.SequenceVariant): return sv.posedit.pos.start.is_intronic or sv.posedit.pos.end.is_intronic return False - def get_edit_type(self, sv: hgvs.sequencevariant.SequenceVariant): + def get_edit_type(self, sv: HgvsSequenceVariant) -> str | None: + """Safely extract ``type`` property from SequenceVariant""" if sv is None or sv.posedit is None or sv.posedit.edit is None: return None return sv.posedit.edit.type - def get_position_and_state(self, sv: hgvs.sequencevariant.SequenceVariant): + def get_position_and_state(self, sv: HgvsSequenceVariant) -> tuple[int, int, str]: """Get the details of a sequence variant. Args: @@ -130,7 +131,7 @@ def get_position_and_state(self, sv: hgvs.sequencevariant.SequenceVariant): return start, end, state - def extract_allele_values(self, hgvs_expr: str): + def extract_allele_values(self, hgvs_expr: str) -> dict | None: """Parse hgvs into a VRS Allele Object kwargs: @@ -187,7 +188,7 @@ def extract_allele_values(self, hgvs_expr: str): "literal_sequence": state, } - def from_allele(self, vo, namespace=None): + def from_allele(self, vo: models.Allele, namespace: str | None = None) -> list[str]: """Generate a *list* of HGVS expressions for VRS Allele. If `namespace` is not None, returns HGVS strings for the @@ -259,7 +260,9 @@ def from_allele(self, vo, namespace=None): return list(set(hgvs_exprs)) - def _to_sequence_variant(self, vo, sequence_type, sequence, accession): + def _to_sequence_variant( + self, vo: models.Allele, sequence_type: str, sequence: str, accession: str + ) -> HgvsSequenceVariant: """Create a SequenceVariant object from an Allele object.""" # build interval and edit depending on sequence type if sequence_type == "p": @@ -285,7 +288,7 @@ def _to_sequence_variant(self, vo, sequence_type, sequence, accession): edit = hgvs.edit.NARefAlt(ref=ref, alt=alt) posedit = hgvs.posedit.PosEdit(pos=ival, edit=edit) - var = hgvs.sequencevariant.SequenceVariant( + var = HgvsSequenceVariant( ac=accession, # at this point, use `n.` because the positions are absolute (not CDS), # this will subsequently be converted back to `c.` after hgvs normalization @@ -307,19 +310,24 @@ def _to_sequence_variant(self, vo, sequence_type, sequence, accession): return var - def normalize(self, hgvs): + def normalize(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant: + """Perform hgvs library `normalize()` method""" return self.normalizer.normalize(hgvs) - def n_to_c(self, hgvs): + def n_to_c(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant: + """Perform hgvs library `n_to_c` method""" return self.variant_mapper.n_to_c(hgvs) - def c_to_n(self, hgvs): + def c_to_n(self, hgvs: HgvsSequenceVariant) -> HgvsSequenceVariant: + """Perform hgvs library `c_to_n` method""" return self.variant_mapper.c_to_n(hgvs) if __name__ == "__main__": import os + from ga4gh.vrs.dataproxy import create_dataproxy + seqrepo_uri = os.environ.get( "SEQREPO_URI", "seqrepo+file:///usr/local/share/seqrepo/latest" ) @@ -345,7 +353,7 @@ def c_to_n(self, hgvs): "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, } - vrs_allele = models.Allele.parse_obj(allele_dict) + vrs_allele = models.Allele.model_validate(allele_dict) dp = create_dataproxy(seqrepo_uri) hgvs_tools = HgvsTools(dp) hgvs_expr = hgvs_tools.from_allele(vrs_allele, namespace="refseq")