From 439c4db832b645856b01cf62cfe935c8db054ec5 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sun, 2 Feb 2025 11:50:51 -0500 Subject: [PATCH 01/12] progress --- pyproject.toml | 5 +- src/ga4gh/vrs/extras/annotator/__init__.py | 0 src/ga4gh/vrs/extras/annotator/cli.py | 195 +++++++++ .../{vcf_annotation.py => annotator/vcf.py} | 401 +++++------------- 4 files changed, 291 insertions(+), 310 deletions(-) create mode 100644 src/ga4gh/vrs/extras/annotator/__init__.py create mode 100644 src/ga4gh/vrs/extras/annotator/cli.py rename src/ga4gh/vrs/extras/{vcf_annotation.py => annotator/vcf.py} (52%) diff --git a/pyproject.toml b/pyproject.toml index 39ff55cb..3aa80fa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ Source = "https://github.com/ga4gh/vrs-python" "Bug Tracker" = "https://github.com/ga4gh/vrs-python/issues" [project.scripts] -vrs-annotate = "ga4gh.vrs.extras.vcf_annotation:_cli" +vrs-annotate = "ga4gh.vrs.extras.annotator.cli:_cli" [build-system] requires = ["setuptools>=65.3", "setuptools_scm>=8"] @@ -200,9 +200,6 @@ exclude = [ "ANN201", "ANN202", ] -"src/ga4gh/vrs/extras/vcf_annotation.py" = [ - "PTH123", # see https://github.com/ga4gh/vrs-python/issues/482 -] "src/ga4gh/vrs/extras/translator.py" = [ "ANN001", "ANN101", diff --git a/src/ga4gh/vrs/extras/annotator/__init__.py b/src/ga4gh/vrs/extras/annotator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ga4gh/vrs/extras/annotator/cli.py b/src/ga4gh/vrs/extras/annotator/cli.py new file mode 100644 index 00000000..87d3d109 --- /dev/null +++ b/src/ga4gh/vrs/extras/annotator/cli.py @@ -0,0 +1,195 @@ +"""Define command-line interface for VRS annotator tool. + +$ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl + +""" + +import logging +from collections.abc import Callable +from enum import Enum +from pathlib import Path +from timeit import default_timer as timer + +import click + +_logger = logging.getLogger(__name__) + + +@click.group() +def _cli() -> None: + """Annotate input files with VRS variation objects.""" + logging.basicConfig( + filename="vrs-annotator.log", + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + +class _LogLevel(str, Enum): + """Define legal values for `--log_level` option.""" + + DEBUG = "debug" + INFO = "info" + WARNING = "warning" + ERROR = "error" + CRITICAL = "critical" + + +def _log_level_option(func: Callable) -> Callable: + """Provide reusable log level CLI option decorator. + + Adds a `--log_level` CLI option to any decorated command. Doesn't pass on any + values, just sets the logging level for this module. + + :param func: incoming click command + :return: same command, wrapped with log level option + """ + + def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: ARG001 + level_map = { + _LogLevel.DEBUG: logging.DEBUG, + _LogLevel.INFO: logging.INFO, + _LogLevel.WARNING: logging.WARNING, + _LogLevel.ERROR: logging.ERROR, + _LogLevel.CRITICAL: logging.CRITICAL, + } + logging.getLogger(__name__).setLevel(level_map[value]) + + return click.option( + "--log_level", + type=click.Choice([v.value for v in _LogLevel.__members__.values()]), + default="info", + help="Set the logging level.", + callback=_set_log_level, + expose_value=False, + is_eager=True, + )(func) + + +@_cli.command(name="vcf") +@_log_level_option +@click.argument( + "vcf_in", + nargs=1, + type=click.Path(exists=True, readable=True, dir_okay=False, path_type=Path), +) +@click.option( + "--vcf_out", + required=False, + type=click.Path(writable=True, allow_dash=False, path_type=Path), + help=( + "Declare save location for output annotated VCF. If not provided, must provide --vrs_pickle_out." + ), +) +@click.option( + "--vrs_pickle_out", + required=False, + type=click.Path(writable=True, allow_dash=False, path_type=Path), + help=( + "Declare save location for output VCF pickle. If not provided, must provide --vcf_out." + ), +) +@click.option( + "--vrs_attributes", + is_flag=True, + default=False, + help="Include VRS_Start, VRS_End, and VRS_State fields in the VCF output INFO field.", +) +@click.option( + "--seqrepo_dp_type", + required=False, + default=SeqRepoProxyType.LOCAL, + type=click.Choice( + [v.value for v in SeqRepoProxyType.__members__.values()], case_sensitive=True + ), + help="Specify type of SeqRepo dataproxy to use.", + show_default=True, + show_choices=True, +) +@click.option( + "--seqrepo_root_dir", + required=False, + default=Path("/usr/local/share/seqrepo/latest"), + type=click.Path(path_type=Path), + help="Define root directory for local SeqRepo instance, if --seqrepo_dp_type=local.", + show_default=True, +) +@click.option( + "--seqrepo_base_url", + required=False, + default="http://localhost:5000/seqrepo", + help="Specify base URL for SeqRepo REST API, if --seqrepo_dp_type=rest.", + show_default=True, +) +@click.option( + "--assembly", + required=False, + default="GRCh38", + show_default=True, + help="Specify assembly that was used to create input VCF.", + type=str, +) +@click.option( + "--skip_ref", + is_flag=True, + default=False, + help="Skip VRS computation for REF alleles.", +) +@click.option( + "--require_validation", + is_flag=True, + default=False, + help="Require validation checks to pass to construct a VRS object.", +) +@click.option( + "--silent", + "-s", + is_flag=True, + default=False, + help="Suppress messages printed to stdout", +) +def _annotate_vcf_cli( + vcf_in: Path, + vcf_out: Path | None, + vrs_pickle_out: Path | None, + vrs_attributes: bool, + seqrepo_dp_type: SeqRepoProxyType, + seqrepo_root_dir: Path, + seqrepo_base_url: str, + assembly: str, + skip_ref: bool, + require_validation: bool, + silent: bool, +) -> None: + """Extract VRS objects from VCF located at VCF_IN. + + $ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl + + Note that at least one of --vcf_out or --vrs_pickle_out must be selected and defined. + """ + annotator = VCFAnnotator( + seqrepo_dp_type, seqrepo_base_url, str(seqrepo_root_dir.absolute()) + ) + vcf_out_str = str(vcf_out.absolute()) if vcf_out is not None else vcf_out + vrs_pkl_out_str = ( + str(vrs_pickle_out.absolute()) if vrs_pickle_out is not None else vrs_pickle_out + ) + start = timer() + msg = f"Annotating {vcf_in} with the VCF Annotator..." + _logger.info(msg) + if not silent: + click.echo(msg) + annotator.annotate( + str(vcf_in.absolute()), + vcf_out=vcf_out_str, + vrs_pickle_out=vrs_pkl_out_str, + vrs_attributes=vrs_attributes, + assembly=assembly, + compute_for_ref=(not skip_ref), + require_validation=require_validation, + ) + end = timer() + msg = f"VCF Annotator finished in {(end - start):.5f} seconds" + _logger.info(msg) + if not silent: + click.echo(msg) diff --git a/src/ga4gh/vrs/extras/vcf_annotation.py b/src/ga4gh/vrs/extras/annotator/vcf.py similarity index 52% rename from src/ga4gh/vrs/extras/vcf_annotation.py rename to src/ga4gh/vrs/extras/annotator/vcf.py index d0748bdf..5a29b63f 100644 --- a/src/ga4gh/vrs/extras/vcf_annotation.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -5,218 +5,22 @@ """ import logging -import pathlib import pickle -from collections.abc import Callable -from enum import Enum -from timeit import default_timer as timer +from pathlib import Path +from typing import ClassVar -import click import pysam -from biocommons.seqrepo import SeqRepo from pydantic import ValidationError from ga4gh.core import VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when -from ga4gh.vrs.dataproxy import ( - DataProxyValidationError, - SeqRepoDataProxy, - SeqRepoRESTDataProxy, -) +from ga4gh.vrs.dataproxy import DataProxyValidationError, _DataProxy from ga4gh.vrs.extras.translator import AlleleTranslator _logger = logging.getLogger(__name__) -_logger.setLevel(logging.DEBUG) -class VCFAnnotatorException(Exception): # noqa: N818 - """Custom exceptions for VCF Annotator tool""" - - -class SeqRepoProxyType(str, Enum): - """Define constraints for SeqRepo Data Proxy types""" - - LOCAL = "local" - REST = "rest" - - -@click.group() -def _cli() -> None: - """Annotate input files with VRS variation objects.""" - logging.basicConfig( - filename="vrs-annotate.log", - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - ) - - -class _LogLevel(str, Enum): - """Define legal values for `--log_level` option.""" - - DEBUG = "debug" - INFO = "info" - WARNING = "warning" - ERROR = "error" - CRITICAL = "critical" - - -def _log_level_option(func: Callable) -> Callable: - """Provide reusable log level CLI option decorator. - - Adds a `--log_level` CLI option to any decorated command. Doesn't pass on any - values, just sets the logging level for this module. - - :param func: incoming click command - :return: same command, wrapped with log level option - """ - - def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: ARG001 - level_map = { - _LogLevel.DEBUG: logging.DEBUG, - _LogLevel.INFO: logging.INFO, - _LogLevel.WARNING: logging.WARNING, - _LogLevel.ERROR: logging.ERROR, - _LogLevel.CRITICAL: logging.CRITICAL, - } - logging.getLogger(__name__).setLevel(level_map[value]) - - return click.option( - "--log_level", - type=click.Choice([v.value for v in _LogLevel.__members__.values()]), - default="info", - help="Set the logging level.", - callback=_set_log_level, - expose_value=False, - is_eager=True, - )(func) - - -@_cli.command(name="vcf") -@_log_level_option -@click.argument( - "vcf_in", - nargs=1, - type=click.Path(exists=True, readable=True, dir_okay=False, path_type=pathlib.Path), -) -@click.option( - "--vcf_out", - required=False, - type=click.Path(writable=True, allow_dash=False, path_type=pathlib.Path), - help=( - "Declare save location for output annotated VCF. If not provided, must provide --vrs_pickle_out." - ), -) -@click.option( - "--vrs_pickle_out", - required=False, - type=click.Path(writable=True, allow_dash=False, path_type=pathlib.Path), - help=( - "Declare save location for output VCF pickle. If not provided, must provide --vcf_out." - ), -) -@click.option( - "--vrs_attributes", - is_flag=True, - default=False, - help="Include VRS_Start, VRS_End, and VRS_State fields in the VCF output INFO field.", -) -@click.option( - "--seqrepo_dp_type", - required=False, - default=SeqRepoProxyType.LOCAL, - type=click.Choice( - [v.value for v in SeqRepoProxyType.__members__.values()], case_sensitive=True - ), - help="Specify type of SeqRepo dataproxy to use.", - show_default=True, - show_choices=True, -) -@click.option( - "--seqrepo_root_dir", - required=False, - default=pathlib.Path("/usr/local/share/seqrepo/latest"), - type=click.Path(path_type=pathlib.Path), - help="Define root directory for local SeqRepo instance, if --seqrepo_dp_type=local.", - show_default=True, -) -@click.option( - "--seqrepo_base_url", - required=False, - default="http://localhost:5000/seqrepo", - help="Specify base URL for SeqRepo REST API, if --seqrepo_dp_type=rest.", - show_default=True, -) -@click.option( - "--assembly", - required=False, - default="GRCh38", - show_default=True, - help="Specify assembly that was used to create input VCF.", - type=str, -) -@click.option( - "--skip_ref", - is_flag=True, - default=False, - help="Skip VRS computation for REF alleles.", -) -@click.option( - "--require_validation", - is_flag=True, - default=False, - help="Require validation checks to pass to construct a VRS object.", -) -@click.option( - "--silent", - "-s", - is_flag=True, - default=False, - help="Suppress messages printed to stdout", -) -def _annotate_vcf_cli( - vcf_in: pathlib.Path, - vcf_out: pathlib.Path | None, - vrs_pickle_out: pathlib.Path | None, - vrs_attributes: bool, - seqrepo_dp_type: SeqRepoProxyType, - seqrepo_root_dir: pathlib.Path, - seqrepo_base_url: str, - assembly: str, - skip_ref: bool, - require_validation: bool, - silent: bool, -) -> None: - """Extract VRS objects from VCF located at VCF_IN. - - $ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl - - Note that at least one of --vcf_out or --vrs_pickle_out must be selected and defined. - """ - annotator = VCFAnnotator( - seqrepo_dp_type, seqrepo_base_url, str(seqrepo_root_dir.absolute()) - ) - vcf_out_str = str(vcf_out.absolute()) if vcf_out is not None else vcf_out - vrs_pkl_out_str = ( - str(vrs_pickle_out.absolute()) if vrs_pickle_out is not None else vrs_pickle_out - ) - start = timer() - msg = f"Annotating {vcf_in} with the VCF Annotator..." - _logger.info(msg) - if not silent: - click.echo(msg) - annotator.annotate( - str(vcf_in.absolute()), - vcf_out=vcf_out_str, - vrs_pickle_out=vrs_pkl_out_str, - vrs_attributes=vrs_attributes, - assembly=assembly, - compute_for_ref=(not skip_ref), - require_validation=require_validation, - ) - end = timer() - msg = f"VCF Annotator finished in {(end - start):.5f} seconds" - _logger.info(msg) - if not silent: - click.echo(msg) +class VCFAnnotatorError(Exception): + """Raise for errors specific to the VCF annotation process""" class VCFAnnotator: @@ -233,125 +37,113 @@ class VCFAnnotator: VRS_STATES_FIELD = "VRS_States" VRS_ERROR_FIELD = "VRS_Error" # VCF character escape map - VCF_ESCAPE_MAP = [ # noqa: RUF012 - ("%", "%25"), - (";", "%3B"), - (",", "%2C"), - ("\r", "%0D"), - ("\n", "%0A"), - ("\t", "%09"), - ] - - def __init__( - self, - seqrepo_dp_type: SeqRepoProxyType = SeqRepoProxyType.LOCAL, - seqrepo_base_url: str = "http://localhost:5000/seqrepo", - seqrepo_root_dir: str = "/usr/local/share/seqrepo/latest", - ) -> None: + VCF_ESCAPE_MAP: ClassVar = str.maketrans( + { + "%": "%25", + ";": "%3B", + ",": "%2C", + "\r": "%0D", + "\n": "%0A", + "\t": "%09", + } + ) + + def __init__(self, data_proxy: _DataProxy) -> None: """Initialize the VCFAnnotator class. - :param seqrepo_dp_type: The type of SeqRepo Data Proxy to use - (i.e., local vs REST) - :param seqrepo_base_url: The base url for SeqRepo REST API - :param seqrepo_root_dir: The root directory for the local SeqRepo instance + :param data_proxy: """ - if seqrepo_dp_type == SeqRepoProxyType.LOCAL: - self.dp = SeqRepoDataProxy(SeqRepo(seqrepo_root_dir)) - else: - self.dp = SeqRepoRESTDataProxy(seqrepo_base_url) - self.tlr = AlleleTranslator(self.dp) + self.data_proxy = data_proxy + self.tlr = AlleleTranslator(self.data_proxy) - @use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.MISSING) - def annotate( + def _update_vcf_header( self, - vcf_in: str, - vcf_out: str | None = None, - vrs_pickle_out: str | None = None, - vrs_attributes: bool = False, - assembly: str = "GRCh38", - compute_for_ref: bool = True, - require_validation: bool = True, + vcf: pysam.VariantFile, + info_field_num: str, + info_field_desc: str, + incl_vrs_attrs: bool, ) -> None: - """Given a VCF, produce an output VCF annotated with VRS allele IDs, and/or - a pickle file containing the full VRS objects. - - :param vcf_in: Location of input VCF - :param vcf_out: The path for the output VCF file - :param vrs_pickle_out: The path for the output VCF pickle file - :param vrs_attributes: If `True`, include VRS_Start, VRS_End, VRS_State - properties in the VCF INFO field. If `False` will not include these - properties. Only used if `vcf_out` is defined. - :param assembly: The assembly used in `vcf_in` data - :param compute_for_ref: If true, compute VRS IDs for the reference allele - :param require_validation: If `True`, validation checks (i.e., REF value - matches expected REF at given location) must pass in order to return a VRS - object for a record. If `False` then VRS object will be returned even if - validation checks fail, although all instances of failed validation are - logged as warnings regardless. - """ - if not any((vcf_out, vrs_pickle_out)): - msg = "Must provide one of: `vcf_out` or `vrs_pickle_out`" - raise VCFAnnotatorException(msg) - - info_field_num = "R" if compute_for_ref else "A" - info_field_desc = "REF and ALT" if compute_for_ref else "ALT" - - vrs_data = {} - vcf_in = pysam.VariantFile(filename=vcf_in) - vcf_in.header.info.add( + vcf.header.info.add( self.VRS_ALLELE_IDS_FIELD, info_field_num, "String", - ( - "The computed identifiers for the GA4GH VRS Alleles corresponding to the " - f"GT indexes of the {info_field_desc} alleles" - ), + f"The computed identifiers for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) - vcf_in.header.info.add( + vcf.header.info.add( self.VRS_ERROR_FIELD, ".", "String", - ("If an error occurred computing a VRS Identifier, the error message"), + "If an error occurred computing a VRS Identifier, the error message", ) - if vrs_attributes: - vcf_in.header.info.add( + if incl_vrs_attrs: + vcf.header.info.add( self.VRS_STARTS_FIELD, info_field_num, "String", - ( - "Interresidue coordinates used as the location starts for the GA4GH " - f"VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles" - ), + f"Interresidue coordinates used as the location starts for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) - vcf_in.header.info.add( + vcf.header.info.add( self.VRS_ENDS_FIELD, info_field_num, "String", - ( - "Interresidue coordinates used as the location ends for the GA4GH VRS " - f"Alleles corresponding to the GT indexes of the {info_field_desc} alleles" - ), + f"Interresidue coordinates used as the location ends for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) - vcf_in.header.info.add( + vcf.header.info.add( self.VRS_STATES_FIELD, info_field_num, "String", - ( - "The literal sequence states used for the GA4GH VRS Alleles " - f"corresponding to the GT indexes of the {info_field_desc} alleles" - ), + f"The literal sequence states used for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) - if vcf_out: - vcf_out = pysam.VariantFile(vcf_out, "w", header=vcf_in.header) + @use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.MISSING) + def annotate( + self, + input_vcf_path: Path, + output_vcf_path: Path | None = None, + output_pkl_path: Path | None = None, + incl_vrs_attrs: bool = False, + assembly: str = "GRCh38", + compute_for_ref: bool = True, + require_validation: bool = True, + ) -> None: + """Given a VCF, produce an output VCF annotated with VRS allele IDs, and/or + a pickle file containing the full VRS objects. + + :param input_vcf_path: location of input VCF + :param output_vcf_path: location at which to save output VCF (optional) + :param output_pkl_path: location at which to save output PKL file (output) + :param incl_vrs_attrs: whether ``VRS_Start``, ``VRS_End``, and ``VRS_State`` + attributes should be included in output VCF info field. These properties + may be useful to retain outside of the VRS object for reasons like + searchability. Does nothing if ``output_vcf_path`` left unset. + :param assembly: The assembly used in `vcf_in` data + :param compute_for_ref: If true, compute VRS IDs for the reference allele + :param require_validation: If ``True``, validation checks (i.e., REF value + matches expected REF at given location) must pass in order to return a VRS + object for a record. If ``False`` then VRS object will be returned even if + validation checks fail, although all instances of failed validation are + logged as warnings regardless. + """ + if not any((output_vcf_path, output_pkl_path)): + msg = "Must provide one of: `output_vcf_path` or `output_pkl_path`" + raise VCFAnnotatorError(msg) - output_vcf = bool(vcf_out) - output_pickle = bool(vrs_pickle_out) + vcf = pysam.VariantFile(filename=str(input_vcf_path.absolute())) + info_field_num = "R" if compute_for_ref else "A" + info_field_desc = "REF and ALT" if compute_for_ref else "ALT" + self._update_vcf_header(vcf, info_field_num, info_field_desc, incl_vrs_attrs) - for record in vcf_in: + vcf_out = ( + pysam.VariantFile(str(output_vcf_path.absolute()), "w", header=vcf.header) + if output_vcf_path + else None + ) + + vrs_data = {} + for record in vcf: additional_info_fields = [self.VRS_ALLELE_IDS_FIELD] - if vrs_attributes: + if incl_vrs_attrs: additional_info_fields += [ self.VRS_STARTS_FIELD, self.VRS_ENDS_FIELD, @@ -363,7 +155,7 @@ def annotate( vrs_data, assembly, additional_info_fields, - vrs_attributes=vrs_attributes, + incl_vrs_attrs=incl_vrs_attrs, output_pickle=output_pickle, output_vcf=output_vcf, compute_for_ref=compute_for_ref, @@ -372,8 +164,7 @@ def annotate( except Exception as ex: _logger.exception("VRS error on %s-%s", record.chrom, record.pos) err_msg = f"{ex}" or f"{type(ex)}" - for search_repl in VCFAnnotator.VCF_ESCAPE_MAP: - err_msg = err_msg.replace(search_repl[0], search_repl[1]) + err_msg = err_msg.translate(self.VCF_ESCAPE_MAP) additional_info_fields = [self.VRS_ERROR_FIELD] vrs_field_data = {self.VRS_ERROR_FIELD: [err_msg]} @@ -383,19 +174,17 @@ def annotate( record.pos, vrs_field_data, ) - - if output_vcf: + if output_vcf_path and vcf_out: for k in additional_info_fields: record.info[k] = [value or "." for value in vrs_field_data[k]] vcf_out.write(record) - vcf_in.close() + vcf.close() - if output_vcf: + if vcf_out: vcf_out.close() - - if vrs_pickle_out: - with open(vrs_pickle_out, "wb") as wf: + if output_pkl_path: + with output_pkl_path.open("wb") as wf: pickle.dump(vrs_data, wf) def _get_vrs_object( @@ -495,11 +284,11 @@ def _get_vrs_data( vrs_data: dict, assembly: str, additional_info_fields: list[str], - vrs_attributes: bool = False, - output_pickle: bool = True, - output_vcf: bool = True, - compute_for_ref: bool = True, - require_validation: bool = True, + incl_vrs_attrs: bool, + output_pickle: bool, + output_vcf: bool, + compute_for_ref: bool, + require_validation: bool, ) -> dict: """Get VRS data for record's reference and alt alleles. @@ -537,7 +326,7 @@ def _get_vrs_data( assembly, output_pickle=output_pickle, output_vcf=output_vcf, - vrs_attributes=vrs_attributes, + vrs_attributes=incl_vrs_attrs, require_validation=require_validation, ) @@ -560,7 +349,7 @@ def _get_vrs_data( vrs_data_key=data, output_pickle=output_pickle, output_vcf=output_vcf, - vrs_attributes=vrs_attributes, + vrs_attributes=incl_vrs_attrs, require_validation=require_validation, ) From d524cac5eea376766ff850a44dbb4028a11cda88 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 3 Feb 2025 11:04:40 -0500 Subject: [PATCH 02/12] more stash --- src/ga4gh/vrs/extras/annotator/vcf.py | 197 +++++++++++++------------- 1 file changed, 101 insertions(+), 96 deletions(-) diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index 5a29b63f..e117a772 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -59,10 +59,11 @@ def __init__(self, data_proxy: _DataProxy) -> None: def _update_vcf_header( self, vcf: pysam.VariantFile, - info_field_num: str, - info_field_desc: str, + incl_ref_allele: bool, incl_vrs_attrs: bool, ) -> None: + info_field_num = "R" if incl_ref_allele else "A" + info_field_desc = "REF and ALT" if incl_ref_allele else "ALT" vcf.header.info.add( self.VRS_ALLELE_IDS_FIELD, info_field_num, @@ -96,6 +97,84 @@ def _update_vcf_header( f"The literal sequence states used for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) + def _process_vcf_row( + self, + record: pysam.VariantRecord, + vrs_data: dict, + assembly: str, + vrs_info_fields: list[str], + incl_vrs_attrs: bool, + incl_ref_allele: bool, + output_pickle: bool, + require_validation: bool, + ) -> dict: + """Compute VRS objects for a VCF row. + + Get VRS data for record's reference (if requested) and alt alleles. Return + INFO field values to annotate VCF row with. + + # TODO update these + :param record: A row in the VCF file + :param vrs_data: Dictionary containing the VRS object information for the VCF. + Will be mutated if `output_pickle = True` + :param assembly: The assembly used in `record` + :param additional_info_fields: Additional VRS fields to add in INFO field + :param vrs_attributes: If `True` will include VRS_Start, VRS_End, VRS_State + fields in the INFO field. If `False` will not include these fields. Only + used if `output_vcf` set to `True`. + :param output_pickle: If `True`, VRS pickle file will be output. + :param output_vcf: If `True`, annotated VCF file will be output. + :param compute_for_ref: If true, compute VRS IDs for the reference allele + :param require_validation: If `True` then validation checks must pass in + order to return a VRS object. A `DataProxyValidationError` will be raised if + validation checks fail. If `False` then VRS object will be returned even if + validation checks fail. Defaults to `True`. + :return: If `output_vcf = True`, a dictionary containing VRS Fields and list + of associated values. If `output_vcf = False`, an empty dictionary will be + returned. + """ + vrs_info_field_data = {field: [] for field in vrs_info_fields} + + # Get VRS data for reference allele + gnomad_loc = f"{record.chrom}-{record.pos}" + if incl_ref_allele: + reference_allele = f"{gnomad_loc}-{record.ref}-{record.ref}" + self._get_vrs_object( + reference_allele, + vrs_data, + vrs_info_field_data, + assembly, + output_pickle=output_pickle, + output_vcf=output_vcf, + vrs_attributes=incl_vrs_attrs, + require_validation=require_validation, + ) + + # Get VRS data for alts + alts = record.alts or [] + alleles = [f"{gnomad_loc}-{record.ref}-{a}" for a in [*alts]] + data = f"{record.chrom}\t{record.pos}\t{record.ref}\t{record.alts}" + for allele in alleles: + if "*" in allele: + _logger.debug("Star allele found: %s", allele) + if output_vcf: + for field in vrs_info_fields: + vrs_info_field_data[field].append("") + else: + self._get_vrs_object( + allele, + vrs_data, + vrs_info_field_data, + assembly, + vrs_data_key=data, + output_pickle=output_pickle, + output_vcf=output_vcf, + vrs_attributes=incl_vrs_attrs, + require_validation=require_validation, + ) + + return vrs_info_field_data + @use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.MISSING) def annotate( self, @@ -103,8 +182,8 @@ def annotate( output_vcf_path: Path | None = None, output_pkl_path: Path | None = None, incl_vrs_attrs: bool = False, + incl_ref_allele: bool = True, assembly: str = "GRCh38", - compute_for_ref: bool = True, require_validation: bool = True, ) -> None: """Given a VCF, produce an output VCF annotated with VRS allele IDs, and/or @@ -117,8 +196,9 @@ def annotate( attributes should be included in output VCF info field. These properties may be useful to retain outside of the VRS object for reasons like searchability. Does nothing if ``output_vcf_path`` left unset. + :param incl_ref_allele: If true, perform VRS ID computation for REF allele and + include the corresponding VRS object in any data dumps :param assembly: The assembly used in `vcf_in` data - :param compute_for_ref: If true, compute VRS IDs for the reference allele :param require_validation: If ``True``, validation checks (i.e., REF value matches expected REF at given location) must pass in order to return a VRS object for a record. If ``False`` then VRS object will be returned even if @@ -130,9 +210,8 @@ def annotate( raise VCFAnnotatorError(msg) vcf = pysam.VariantFile(filename=str(input_vcf_path.absolute())) - info_field_num = "R" if compute_for_ref else "A" - info_field_desc = "REF and ALT" if compute_for_ref else "ALT" - self._update_vcf_header(vcf, info_field_num, info_field_desc, incl_vrs_attrs) + if output_vcf_path: + self._update_vcf_header(vcf, incl_ref_allele, incl_vrs_attrs) vcf_out = ( pysam.VariantFile(str(output_vcf_path.absolute()), "w", header=vcf.header) @@ -142,30 +221,33 @@ def annotate( vrs_data = {} for record in vcf: - additional_info_fields = [self.VRS_ALLELE_IDS_FIELD] - if incl_vrs_attrs: - additional_info_fields += [ - self.VRS_STARTS_FIELD, - self.VRS_ENDS_FIELD, - self.VRS_STATES_FIELD, - ] + if vcf_out: + vrs_info_fields = [self.VRS_ALLELE_IDS_FIELD] + if incl_vrs_attrs: + vrs_info_fields += [ + self.VRS_STARTS_FIELD, + self.VRS_ENDS_FIELD, + self.VRS_STATES_FIELD, + ] + else: + vrs_info_fields = [] try: - vrs_field_data = self._get_vrs_data( + vrs_field_data = self._process_vcf_row( record, vrs_data, assembly, - additional_info_fields, + vrs_info_fields, incl_vrs_attrs=incl_vrs_attrs, + incl_ref_allele=incl_ref_allele, output_pickle=output_pickle, output_vcf=output_vcf, - compute_for_ref=compute_for_ref, require_validation=require_validation, ) except Exception as ex: _logger.exception("VRS error on %s-%s", record.chrom, record.pos) err_msg = f"{ex}" or f"{type(ex)}" err_msg = err_msg.translate(self.VCF_ESCAPE_MAP) - additional_info_fields = [self.VRS_ERROR_FIELD] + vrs_info_fields = [self.VRS_ERROR_FIELD] vrs_field_data = {self.VRS_ERROR_FIELD: [err_msg]} _logger.debug( @@ -175,7 +257,7 @@ def annotate( vrs_field_data, ) if output_vcf_path and vcf_out: - for k in additional_info_fields: + for k in vrs_info_fields: record.info[k] = [value or "." for value in vrs_field_data[k]] vcf_out.write(record) @@ -277,80 +359,3 @@ def _get_vrs_object( vrs_field_data[self.VRS_STARTS_FIELD].append(start) vrs_field_data[self.VRS_ENDS_FIELD].append(end) vrs_field_data[self.VRS_STATES_FIELD].append(alt) - - def _get_vrs_data( - self, - record: pysam.VariantRecord, - vrs_data: dict, - assembly: str, - additional_info_fields: list[str], - incl_vrs_attrs: bool, - output_pickle: bool, - output_vcf: bool, - compute_for_ref: bool, - require_validation: bool, - ) -> dict: - """Get VRS data for record's reference and alt alleles. - - :param record: A row in the VCF file - :param vrs_data: Dictionary containing the VRS object information for the VCF. - Will be mutated if `output_pickle = True` - :param assembly: The assembly used in `record` - :param additional_info_fields: Additional VRS fields to add in INFO field - :param vrs_attributes: If `True` will include VRS_Start, VRS_End, VRS_State - fields in the INFO field. If `False` will not include these fields. Only - used if `output_vcf` set to `True`. - :param output_pickle: If `True`, VRS pickle file will be output. - :param output_vcf: If `True`, annotated VCF file will be output. - :param compute_for_ref: If true, compute VRS IDs for the reference allele - :param require_validation: If `True` then validation checks must pass in - order to return a VRS object. A `DataProxyValidationError` will be raised if - validation checks fail. If `False` then VRS object will be returned even if - validation checks fail. Defaults to `True`. - :return: If `output_vcf = True`, a dictionary containing VRS Fields and list - of associated values. If `output_vcf = False`, an empty dictionary will be - returned. - """ - vrs_field_data = ( - {field: [] for field in additional_info_fields} if output_vcf else {} - ) - - # Get VRS data for reference allele - gnomad_loc = f"{record.chrom}-{record.pos}" - if compute_for_ref: - reference_allele = f"{gnomad_loc}-{record.ref}-{record.ref}" - self._get_vrs_object( - reference_allele, - vrs_data, - vrs_field_data, - assembly, - output_pickle=output_pickle, - output_vcf=output_vcf, - vrs_attributes=incl_vrs_attrs, - require_validation=require_validation, - ) - - # Get VRS data for alts - alts = record.alts or [] - alleles = [f"{gnomad_loc}-{record.ref}-{a}" for a in [*alts]] - data = f"{record.chrom}\t{record.pos}\t{record.ref}\t{record.alts}" - for allele in alleles: - if "*" in allele: - _logger.debug("Star allele found: %s", allele) - if output_vcf: - for field in additional_info_fields: - vrs_field_data[field].append("") - else: - self._get_vrs_object( - allele, - vrs_data, - vrs_field_data, - assembly, - vrs_data_key=data, - output_pickle=output_pickle, - output_vcf=output_vcf, - vrs_attributes=incl_vrs_attrs, - require_validation=require_validation, - ) - - return vrs_field_data From a9b6ade661940276e678cbd364c1f398273e2f18 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 3 Feb 2025 20:32:14 -0500 Subject: [PATCH 03/12] tmp --- src/ga4gh/vrs/extras/annotator/vcf.py | 216 +++++++----------- .../data/test_vcf_output_grch38_attrs.vcf.gz | Bin 0 -> 4753 bytes ...st_vcf_output_grch38_attrs_altsonly.vcf.gz | Bin 0 -> 4504 bytes .../extras/data/test_vcf_pkl_grch38_attrs.pkl | Bin 0 -> 7026 bytes .../test_vcf_pkl_grch38_attrs_altsonly.pkl | Bin 0 -> 3600 bytes tests/extras/test_vcf_annotation.py | 68 +++--- 6 files changed, 116 insertions(+), 168 deletions(-) create mode 100644 tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz create mode 100644 tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz create mode 100644 tests/extras/data/test_vcf_pkl_grch38_attrs.pkl create mode 100644 tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index e117a772..f2cbee09 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -1,8 +1,4 @@ -"""Annotate VCFs with VRS - -$ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl - -""" +"""Annotate VCFs with VRS.""" import logging import pickle @@ -10,10 +6,9 @@ from typing import ClassVar import pysam -from pydantic import ValidationError from ga4gh.core import VrsObjectIdentifierIs, use_ga4gh_compute_identifier_when -from ga4gh.vrs.dataproxy import DataProxyValidationError, _DataProxy +from ga4gh.vrs.dataproxy import _DataProxy from ga4gh.vrs.extras.translator import AlleleTranslator _logger = logging.getLogger(__name__) @@ -97,6 +92,66 @@ def _update_vcf_header( f"The literal sequence states used for the GA4GH VRS Alleles corresponding to the GT indexes of the {info_field_desc} alleles", ) + def _process_allele( + self, + vcf_coords: str, + vrs_data: dict, + annotations: dict, + assembly: str, + vrs_data_key: str | None = None, + output_pickle: bool = True, + vrs_attributes: bool = False, + require_validation: bool = True, + ) -> None: + """Get VRS object given `vcf_coords`. `vrs_data` and `vrs_field_data` will + be mutated. + + # TODO update + """ + try: + vrs_obj = self.tlr._from_gnomad( # noqa: SLF001 + vcf_coords, + assembly_name=assembly, + require_validation=require_validation, + ) + except Exception: + vrs_obj = None + _logger.exception( + "Exception encountered during translation of variation: %s", vcf_coords + ) + raise + else: + if not vrs_obj: + _logger.debug( + "None was returned when translating %s from gnomad", vcf_coords + ) + + if output_pickle and vrs_obj: + key = vrs_data_key if vrs_data_key else vcf_coords + vrs_data[key] = str(vrs_obj.model_dump(exclude_none=True)) + + if annotations: + allele_id = vrs_obj.id if vrs_obj else "" + annotations[self.VRS_ALLELE_IDS_FIELD].append(allele_id) + + if vrs_attributes: + if vrs_obj: + start = str(vrs_obj.location.start) + end = str(vrs_obj.location.end) + alt = ( + str(vrs_obj.state.sequence.root) + if vrs_obj.state.sequence + else "" + ) + else: + start = "" + end = "" + alt = "" + + annotations[self.VRS_STARTS_FIELD].append(start) + annotations[self.VRS_ENDS_FIELD].append(end) + annotations[self.VRS_STATES_FIELD].append(alt) + def _process_vcf_row( self, record: pysam.VariantRecord, @@ -114,38 +169,19 @@ def _process_vcf_row( INFO field values to annotate VCF row with. # TODO update these - :param record: A row in the VCF file - :param vrs_data: Dictionary containing the VRS object information for the VCF. - Will be mutated if `output_pickle = True` - :param assembly: The assembly used in `record` - :param additional_info_fields: Additional VRS fields to add in INFO field - :param vrs_attributes: If `True` will include VRS_Start, VRS_End, VRS_State - fields in the INFO field. If `False` will not include these fields. Only - used if `output_vcf` set to `True`. - :param output_pickle: If `True`, VRS pickle file will be output. - :param output_vcf: If `True`, annotated VCF file will be output. - :param compute_for_ref: If true, compute VRS IDs for the reference allele - :param require_validation: If `True` then validation checks must pass in - order to return a VRS object. A `DataProxyValidationError` will be raised if - validation checks fail. If `False` then VRS object will be returned even if - validation checks fail. Defaults to `True`. - :return: If `output_vcf = True`, a dictionary containing VRS Fields and list - of associated values. If `output_vcf = False`, an empty dictionary will be - returned. """ - vrs_info_field_data = {field: [] for field in vrs_info_fields} + info_field_annotations = {field: [] for field in vrs_info_fields} # Get VRS data for reference allele gnomad_loc = f"{record.chrom}-{record.pos}" if incl_ref_allele: reference_allele = f"{gnomad_loc}-{record.ref}-{record.ref}" - self._get_vrs_object( + self._process_allele( reference_allele, vrs_data, - vrs_info_field_data, + info_field_annotations, assembly, output_pickle=output_pickle, - output_vcf=output_vcf, vrs_attributes=incl_vrs_attrs, require_validation=require_validation, ) @@ -153,27 +189,25 @@ def _process_vcf_row( # Get VRS data for alts alts = record.alts or [] alleles = [f"{gnomad_loc}-{record.ref}-{a}" for a in [*alts]] - data = f"{record.chrom}\t{record.pos}\t{record.ref}\t{record.alts}" + data_key = f"{record.chrom}\t{record.pos}\t{record.ref}\t{record.alts}" for allele in alleles: if "*" in allele: _logger.debug("Star allele found: %s", allele) - if output_vcf: - for field in vrs_info_fields: - vrs_info_field_data[field].append("") + for field in vrs_info_fields: + info_field_annotations[field].append("") else: - self._get_vrs_object( + self._process_allele( allele, vrs_data, - vrs_info_field_data, + info_field_annotations, assembly, - vrs_data_key=data, + vrs_data_key=data_key, output_pickle=output_pickle, - output_vcf=output_vcf, vrs_attributes=incl_vrs_attrs, require_validation=require_validation, ) - return vrs_info_field_data + return info_field_annotations @use_ga4gh_compute_identifier_when(VrsObjectIdentifierIs.MISSING) def annotate( @@ -218,6 +252,7 @@ def annotate( if output_vcf_path else None ) + create_pkl = bool(output_pkl_path) vrs_data = {} for record in vcf: @@ -230,17 +265,17 @@ def annotate( self.VRS_STATES_FIELD, ] else: + # no info fields are necessary if we aren't producing an annotated VCF vrs_info_fields = [] try: - vrs_field_data = self._process_vcf_row( + vrs_info_field_annotations = self._process_vcf_row( record, vrs_data, assembly, vrs_info_fields, incl_vrs_attrs=incl_vrs_attrs, incl_ref_allele=incl_ref_allele, - output_pickle=output_pickle, - output_vcf=output_vcf, + output_pickle=create_pkl, require_validation=require_validation, ) except Exception as ex: @@ -248,17 +283,19 @@ def annotate( err_msg = f"{ex}" or f"{type(ex)}" err_msg = err_msg.translate(self.VCF_ESCAPE_MAP) vrs_info_fields = [self.VRS_ERROR_FIELD] - vrs_field_data = {self.VRS_ERROR_FIELD: [err_msg]} + vrs_info_field_annotations = {self.VRS_ERROR_FIELD: [err_msg]} _logger.debug( "VCF record %s-%s generated vrs_field_data %s", record.chrom, record.pos, - vrs_field_data, + vrs_info_field_annotations, ) if output_vcf_path and vcf_out: for k in vrs_info_fields: - record.info[k] = [value or "." for value in vrs_field_data[k]] + record.info[k] = [ + value or "." for value in vrs_info_field_annotations[k] + ] vcf_out.write(record) vcf.close() @@ -268,94 +305,3 @@ def annotate( if output_pkl_path: with output_pkl_path.open("wb") as wf: pickle.dump(vrs_data, wf) - - def _get_vrs_object( - self, - vcf_coords: str, - vrs_data: dict, - vrs_field_data: dict, - assembly: str, - vrs_data_key: str | None = None, - output_pickle: bool = True, - output_vcf: bool = False, - vrs_attributes: bool = False, - require_validation: bool = True, - ) -> None: - """Get VRS object given `vcf_coords`. `vrs_data` and `vrs_field_data` will - be mutated. - - :param vcf_coords: Allele to get VRS object for. Format is chr-pos-ref-alt - :param vrs_data: Dictionary containing the VRS object information for the VCF - :param vrs_field_data: If `output_vcf`, keys are VRS Fields and values are list - of VRS data. Empty otherwise. - :param assembly: The assembly used in `vcf_coords` - :param vrs_data_key: The key to update in `vrs_data`. If not provided, will use - `vcf_coords` as the key. - :param output_pickle: If `True`, VRS pickle file will be output. - :param output_vcf: If `True`, annotated VCF file will be output. - :param vrs_attributes: If `True` will include VRS_Start, VRS_End, VRS_State - fields in the INFO field. If `False` will not include these fields. Only - used if `output_vcf` set to `True`. - :param require_validation: If `True` then validation checks must pass in order - to return a VRS object. If `False` then VRS object will be returned even if - validation checks fail. Defaults to `True`. - """ - try: - vrs_obj = self.tlr._from_gnomad( # noqa: SLF001 - vcf_coords, - assembly_name=assembly, - require_validation=require_validation, - ) - except (ValidationError, DataProxyValidationError): - vrs_obj = None - _logger.exception( - "ValidationError when translating %s from gnomad", vcf_coords - ) - raise - except KeyError: - vrs_obj = None - _logger.exception("KeyError when translating %s from gnomad", vcf_coords) - raise - except AssertionError: - vrs_obj = None - _logger.exception( - "AssertionError when translating %s from gnomad", vcf_coords - ) - raise - except Exception: - vrs_obj = None - _logger.exception( - "Unhandled Exception when translating %s from gnomad", vcf_coords - ) - raise - else: - if not vrs_obj: - _logger.debug( - "None was returned when translating %s from gnomad", vcf_coords - ) - - if output_pickle and vrs_obj: - key = vrs_data_key if vrs_data_key else vcf_coords - vrs_data[key] = str(vrs_obj.model_dump(exclude_none=True)) - - if output_vcf: - allele_id = vrs_obj.id if vrs_obj else "" - vrs_field_data[self.VRS_ALLELE_IDS_FIELD].append(allele_id) - - if vrs_attributes: - if vrs_obj: - start = str(vrs_obj.location.start) - end = str(vrs_obj.location.end) - alt = ( - str(vrs_obj.state.sequence.root) - if vrs_obj.state.sequence - else "" - ) - else: - start = "" - end = "" - alt = "" - - vrs_field_data[self.VRS_STARTS_FIELD].append(start) - vrs_field_data[self.VRS_ENDS_FIELD].append(end) - vrs_field_data[self.VRS_STATES_FIELD].append(alt) diff --git a/tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz b/tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..a8805ec627809e3e20bbfd558c0671d4046f2210 GIT binary patch literal 4753 zcmV;C5^n7uiwFb&00000{{{d;LjnN3482@yZ`{Te{jB^7LjKaCvM}!l8U;`#TM;V9 z*26&q1Q4iQu9n3vxgoi7)S~~rcles&l5RKFweulCcFA+k+{d|hhSa`#wMbXVA}`i) zb@KDs+b40%zI*lR?d8?oo12sGFV9atoZj9ZohRk2NHB|$6363F4OY9OseBWTvqW9BTVKo8MRz8`*KX)VNz?D zl~N*L3Xe$`6#)8jOxa@^D!E}8JXMdWi6PQz1WesyO3MXc@|Ra*SZe z&cuWi3d01(MpJb*CWb)+mNAZTsP4Lk9920rQd*2nqwW?m)?8+YK?z7_gO5d*QYIzF zB+`u{d}>Z08lydV8)LPCZ5PJe=}nAL=jzYV=N^Clms>66T_#W zmZ2_VEG3k1!x5y!+nAUQE!POb?oEt~96rN1DEg(fASUK2Xjurc_?R^&IDzWxC5mZ; z!uHT2fQDMYgytR^MiGQM_Runb78nxIL$lD=fQ>M_dTipc-7z2{G!39B48)kwDs;BM z%swgyiH+)zAdc1|sFp{jp{qH@mfvG*kIl8Rh$In0}=;7Sd26*EOt=xvQTKnbOXEf09c?jgo_5n4v-sdZfV|;Ux=e z9aXd?55d|r6{jw_1Hp$BLhJ^2aqDvRf(D*COjAssJd#xG23E1 z=<(3u34=tfMiVHX8y^s%b&%Jb63nSmngCk}f!YjgcvPe?&MQ6bplIA%sF=_}cT|#K zI>ZF^sHnu$J{!~{Cm1g#dgvgtAr~Q>e#s z;Z6)3g33DVGXmp1;~rX&uLjM-tpUNDusIJ}80<_jPqJ0mcnv{RKCZ%}GlRM1f!o3Bq1HmJE5KC%*+r_Fq@nb8<;Qh3%OG$)u231?s!o=|-q zK6u1<<=B%Bqi9hZfeDYAF`qi}a7!Q<=7BBOVZl(0Kc(}~VLMk66`BoF54GnG4i+%a zg1HW#)I)Q}gbp7b8`K98;USK;7V~_Fg=ZED>Ft*GwcWw9L1vJ)jm?{tgVS+sDbkG7 zzBoPDlfsCN=oS_}od)I#^ORDHP+O%I7=J$HnTJ9p2~sC(;~CaGboOagt{UP%In}g zxE!2h7=zh0wH%Z_tOPR;W%!^|qx>D6#}2AfF^lAY8lj~rVMGmYGr(IE~-&}8^G6QIC2_=@;4`*xq9+H!Jw7G?QUi;P*8zzw(;g~ zKYP9~Uwu*f1wUJJalJ~)vRcNOCc~evx!ta#d=Xt-M~l3O=5ZARMj3rdt7TLz(=vKY zvb=iUB#}di97Lqa;SPShS~Wjwlg#66K2OW|u}G5f59Q3@!Ww@WKP3&m`zY#ej(|hH zt;%%nzD?>c{P*N1KOwTJ?A zK-67Q#_Ob~xl78gWSez1hOEvyMs;ZXRSne__`~0{Osb*Hw?iABmheAOT>s2bnmsli z0nfmFbztIXnLaLEF1ghY2-cV7Yscg$f}3XvWL=eHwP)P@@cpME&mMQ#{kBa?N5(lf z&F0B!4~AvFPIe*~dpb2rw2F(zq-gS)l7~&R0Pq zvui(pN{Usy*#Ki_s{9 zx5Lw2=V*RQat~?zh1}{isU@NgP~`e>oMtX+aj%orXKlw3Q?}g~&h(^1osB*CU!^V$cj*t?b4T}h>?t?2AX$Mv7`MY^GXbvh zPasy%R^!Kdnf2)P?z)k&UHyPWNN3ws)d2hfO8uRk?FWd9`)$jCQuZ`=?8e1IS`~2x zv+N%6y7~AfN*CeaHV-G-rhAupSBDLB?GKoYvLu;L#sHw4zZ}JFmj33_tsNlta6>zE zHzwWfTQ_<&Ok5eep96Ic=r>6ari(k5$yvny(tq7AshdaQpffB!;%vTH9U9>G|o^Rkxa(|FfEQ zSg(CpPAz4O{$l8<-9W8St;!F#yg`ia_7H_L@53%o6RXcBEN5 zkJRecje?7Q3*A@h&o{UCr?q$PFVD*ze@-f0ogC}U*5#8M0IGD6y5A3WGmy)ii&Js& zV+6QSgX_c=#)@*2XLa6Jd2`uay_x;JNkjL^&6~INh;e##*9L5Y>zks;i#_z?(rvk- z#9bZbv)Q(&w}>_GX8LgiFMIk&HG}4I==vq}rLpbXDlV#heB^@TmYmMF&RNg{(kynn z!)@twp^@Kiu8PVU_FMTco8zw_0nl&6A7W4?`{pTVB8(U-s0e5uZc;5Pzlg8J_w2`; z_wNoqyuUrTJU@VI4&eKP>yM{b2hEYffm_c(Q`-miuVa7rUtBbUJ-b@~03VA81ONa4 z009360763o0Ja6~*xPcVNEiU%YxgO<*{a>Gj?>KK`CQJ3U7O%HT}Wor}o7-)g#PPG=# zT46q+X?eJ=XhwM!4>w4-s zj%%l065*K}EbH?bE!1K>o2FrG#6d(l8a|F@cnZ8=?VDj}TCV3NTQb@8c&Hh&=V1>U zon~C0napf1jpf>AqD9H5$Ebtu$abS{YS?2xU}tmFoy4jyfbpx56a>kN7YdoO z!eb@xi#(VqXtHIRNB7Bro~N3dC;%P}aw3c^bBp7%mP^m_O|~b`@*#ug_2xQHHL62p zP79f_cHjqL;BAS;*q9}p6&kJ|;>f{vGD{F7*RdCbEZ7gxNUMj3IK*aB^qlYRk`35( zf0wZ0x&2ny>&(+G-Cv8C5}zG+n=bAb;n!KaLluNg$b9`f?$4m5!C|lS@U{D(S?e2( zF;*e3ahqepnJ`8KJ7@*s{2Jxe99Lp21G)TjNi{=J5<)xyfg}L|o|R~pJJ=SPJgMpx zU6B=4QFK+8WnK9c^aJt?-G`u`PlqM`1$6E*^iT3W$=pSpoOS=*)W0W1=dJr^FIArV zKYlc*-XoqY2K~)de?FSyNjRq{^U1t&Zk}{=h@*|wi3;LdZPg5uiPLTv;zP?xd0|CU z&6mdH##yVgJHAYuK%>FV+cEL%dRJW{u7fg1!9O8{tBeS5xP09Ec)# zEu6*zgy6Ms5)bDC0ObK7@Y&9VBDozQ7SOyPv5?va03$*MWjS6}^osl|0RD#n_|K>p zk#*6s7qpX}v8J2P=yp5l_4)fj;?e`!j%1co?ziBm=YltBFGvh~HQKH@iVKqP#$~pf z1)M;Bb!<6@MT4EM!f}4h?W#*To%jQ_OKDu7kc7ym1Y!k=qDhQUWFHP8vxm%*oeYFj zMp>==1$VCZs^!PM ztar^a{TeyhE$}i)m+qm literal 0 HcmV?d00001 diff --git a/tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz b/tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..61708a9e4f4adffdb66eebeaa5c3fdaa054140c9 GIT binary patch literal 4504 zcmV;J5ohiniwFb&00000{{{d;LjnM~482_2ZsSH0eRjTr@Vx9|Jx1TViH!qzJu{9G zPiA~cumJ)Xv_x54NTfqjo_H7g?_14V7eytjjBApI1d+s3r|WX6x=HiZt3|d-7e%>F z>XRSO-aV;f@ztwW?=CKH-(H`5eevex1q|hRdrfc(PL6o z>HP2;xW)s$N$T`OFm9L=3O+l#Jb9We+8;-+v*>m7y72#c{jcOHX(6M-#U`J5S#H)zes_L-wv_HJFYc06eV63P>bc6QyDF`Z7fDqo-%Kz?z!W@|TEs8MlszVo zorty81ehvdGQkyqz8q5zn2u}d1O-nsU|Qywjx7PE9WZq)6=2GjW4ZxTX)c|S=H;02 zfGM4fmF4#3n4ASrX=6*0^qdD!&73hLpaL-!R!c?@dLC+tD`}JzjsP?i6SGW7%PBxZ zH8CE?oI8r;59Jgy2}z|0s-6$!L$ZsG9omwqpmefN)&Va z-T5VB3^Hej2`ys+N2%5XgTfYDCIpKu#R#RlRqW7?SQv^aCGJBp)j!3UArPm*!gLK1 zb{UG$!c_jCupLF&^(Mqr|76TGg*x?>y(?oGlN2KqA5u2nk+Pg5WqTV!O!Y%>&M7v4 zV59TZ4k?cnO+31F0NxH8T#?K-!#bw@LOM<{k_8jfWyqN1C?c5H(J`!WB)xmEF>M*J z36vDU(7lNrQmC9z6dO&`+t@J-Iw3CvQor(k2MR^ z8u1;&r{l38UB*;OIFn8iNJ+4XPXB@p&tVp`JSviI3`f7fXuP0CB>FMVB3H#t%(U$L}8D>W`>Zq!Y&a;5ynw* zEKrK#+#o>5$RUj47HoiaBNZW(RfR=FNI_WXN{pHTWeZa{u?5M2Wne8v`b6q$)WTv% z>T2A9JIY236$1Wj+mWXvMNbc|jY=@hH zfISeUPY}2wx!2W0Ydq%&k~@C`G>fsYQrw&mtViv@5%x#Ta3EufgN48v7o=o_sjc~F zM?-QuAYhO5&v8NNZrp6Np;Qu$hbUl8aGNJUBPB7UEkQG+e_ConjnboGSdc=GdZeQq z;kCfEPAb}3V)5e?PiF*Zi4 zkLB=A3>-qr8uuAR@tpAht=JcXmUwHxC?{+wQ452eDdkDF!Hw53LgkYNADubMEw42A z&;@2dN@x^DCn*mvq;+4cWb9a;D1D7vIv1q34dT(cts~stNj&xWrw}PwW98$N=e9D& z8sZ#f^5J0tB!p6)VpQm{B_)mhVI67i;agUU^0ZlwHZzvuNs7Q4LrX^K5Ih6J@I>ls zeB+Vgk>fxbM=>UC1P&iHQ{HuycuSxJ<$%DBfSxU4{>Z9Q=ShAd}d*Yo^I(L+a28-6pm=y*gjb~Ivuw$Cz^4( z2d77SQW&ub-6HVaX<%+BPbsa!+N!Oh`17g291E3ZM4haQC$4$OLyE^&gIDmDnlsWy zX-9{*#*se8WfiI+#T~_i!vPzG(-=j0JI^XS%E!)1LOHZp4r59B?1)uZSF|H}9DD%R zqmvBhD7&WBqtb_!Q0Ae=H##lJ-_ZqZRGmh|luzbajSq>TB~hk|Mztx86_h%z4et1w za+I|i)u!cwXuj%Xd~_~qN!|?*Y7>u~&XD}gNnmcC!Ydf8F|^&yjRp#?2+lU%{^Q5M z7v`(aNGMXoK0vJ{FDXW)Jz09iUAO8#ip*Z zx&Jn8zD({{X;h{U08&MO?!S0$0l<)dM^2L01I@}qJSAo3KWxnX4%hol{iTQkG$QJM zQYGtjpt(&dtYnk-HpZ;ZdPYrX!mnDYFYrgWXqnbyoA1UpJ}u#YqNI7vQI*4!PWsyH@v-@R}R-TM^ZkEr} z)gBD1Vx4Y9FbQ-TlxUTd4{6!vGiUdY?E=7OfY6r#T!R48Q#t|>Dfb(ub%f{>VFoU6@Z`CX><-TTW2#*d5&_e`^&ZrGU~(VW=E|tMuu%z=e1l*B*{|8L@Y(haJ9gXl zi>I_)C6AB5*c0~*ywXOwFoFv^?QQw@^1H7kQmRhK!ls7Im_M;6r@o z=_Ax(wOOx$crq&sP)ARw_sCI{{yJN2=AP;ME7n|8jm{i7r@kmF#yf=dP8m?JDBk0wj z)4GXN9C_)cI&3n;OC&J1*l=2L3a7I9FLbdlu!1i(Gs+Lj}B+4f&}(>opNZS2ASGV^J;O@G{;d%DL{Px+yR$O`nqq#G{V32;?> z0jxx4HruS~7T^z18m{bZ-$PvdyDdjbIndm)o0RuiT_!cm zvU|kq`iHksw!nki98a`u_pXY*4qNEPA21o^X*%B-1Ay}W@)S3D_KQ!qZh+Xs4c*Y) zn)H`%{OHv(@n!5^2kIR#Y?2^M=eItS^GN({_%2_dVaN+Qi5~Nd-}g@l2uke?=}f5ha&Ka+&Ske z@_D(XZJk@%ib#EdRB2wp2ylLT6m6Q8(G2{!uQhOK0&TCew!M_oH>a1E{c5iN&uY41 zz42kSYbjIo7envb4KxbXsr-1$i(+)UhbWu{ANGOTv8ui7eZ1KxCqBZS4|b4#N1Au@ zNTY83D7YB5&_kvEcztts+IZ*g;?2+%Pk%@0n&enNZhSuZ0ieznnSXz;DZ8o#NwrS`;Ila8?0=0wV+p;XmJ#^#3Z?&S-|2isW zvrXA-4I94g)Z>X=4786L%k9sh+gH$!rnYbDq^$SxjSq@nYBt|^CxP~7dE$44o674! zE4%$X6;%!Fm-bser)Qo5=ojVMRa@H}fyYKvgxw%`Jn5bE`EmJ#VvQbGcn|Iqoa^ zMP+*TcKZz9O%gY7@G6cxbb}N<7 zcHA&%&m2ai66{Q~C^3>SCKC-G#}1wXFI@U&6q%Omx#@~LtaUrq48`-XhmB!3X*(wC z%;Z~zvLt1AgPjCv*ALf1aPc5|5Uo6KzPbsLvy3ekYN;8Mzk42Rrs(Je({*z@o{~W5 zjlM3AG`TyHm9BDmNxsYde;V)$k|`N+tPBAxz7P$6n-qR%k!6S*0!Sf-UJGLP1Pwhe z^WDG%B_$HMQWIO%v>WZivbM1=iE&k8JNGG|)srh?TSD4dWXBj=<_agZCM$dSDZjnF zoMfq9VWDhaRe!9`7%{gp2>mb$y%jN)7*5Jtk>UCgj%_?iofJXRPP{PUz-Ea?Mm;>n z5jN9u=4^eJe8GD9L5e11zx|zjb)coe@zwC|+r^z`EzdR9SVV%xKkZUnyD2T^ZJyu7 zs8m0W(PU{_Mo1u0MY4#1z{w29Z-Gi|k>T`)t}1F@RrS8ED7yM4ty`(NoRhTH=cbC` zQY(L4>l=;krO^_5E#7x~eNVnDMmKw}bk_YIzxtnX$mvR3bffg%9&`-pu4iY0xM1k+ zbLaltUiO`9q3Sf}v+IuESUo z<$j6iD-^{&h10nbes9%V=H~6g^VUE;%J*cl|Ki*KLuk%!xoh@HdXC)XoKNiZdV1FD zF4CnD%{65)wO?swBx%nxQCAUNPZ$-1MIk>1&?Ltq!~sSSWe(CC!mI>Yl;>JS)f>tu z;s1f~e}=XE-#Dt#3+IB#cwuTx7B{OUPxE0KoxAMnX%6q9u-MhO=2BWL+Lhc&9ZRMn zjby|#0CA#BGZLU_axjIE-C*R%sQ^Mc=h$!j1?o7WJ&yaO4XVPcFJ@3x(yIWxt%}(@ zIlV*yuL64zMD1B~(1#FFMIDpRrYzH^NXatCL7D~bf`SN2G^FwZ2OO?;_Ayal z%V{W_GcM{dd`D4xH^Z+Jec77{+0}v>o{jExw9APCn~?96INQETokM{GZ)x;}&X1E^ zp5e%6`BRd1RY{VRC%M{^`P0;kXiVLL~2F-Q( zBNqW~nrSl%6E`JWNt%Cb!Y%$ztIe0t|Jk&%v}uP~Fl`>Kn1AL7@!YyFQNZX^(ubB* z_o{n*_9iqtOa0C*8azHe-#s{^E^YMft6?yYA2UGhW~WWSnN4qzon8=>GmF9(9DVK8 qtNH=Bs@~_$6951oiwFb&00000{{{d;LjnLB00RI3000000001}^PQIf literal 0 HcmV?d00001 diff --git a/tests/extras/data/test_vcf_pkl_grch38_attrs.pkl b/tests/extras/data/test_vcf_pkl_grch38_attrs.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2cce18ec699f4e583a58504c0dc1b8e6a2666d16 GIT binary patch literal 7026 zcmeI0%aWpI6vro*O|OzlrIN*LQk8|1)X>FEkl7*H2r6CxMK=~Iw-y1ph??rEJisi? zQ>Btr_IZoEKmfJH6hkxBJ=L4G?1k9p|Ka(a|2f~suV4PAy?&Lv?)Goqe>KxxhyW@u zhM@ojP2a%?!m`Z$QZfJ_<$JdI)oAylh4cm(T|8jl4b?r$ehHXg5QjCc_r9)!0 zq!$Ls0~BE*me@$I&nv4yv1Ur^q<3*AMU=dwLSIo8HGb1zbhd6rY^U z6wf>Sh7#YqW|(Rj5h@g?u;R-F5U z$nk6c@&;Olueh>#x{vzCagVFUYR7wzBuZ52?r#77+XV4dB6vkzy(tN$w?BP=^OPVN zQPDhz?~FRQ)f`n!5ZapNVk=r65TNNiXK^kP|HApHISUErZw^+5D=1P10g8;?d+E~s zanfAHQDmPFrh#Sn2emjdZuwuIS^~M-0hJvK za$j*bQ^rlR$*PSa@p-ke3LW3BwDIv`c(WW$fJ(~|14DP2kOl@Tk4>+iUmHb~o~t$@ z2 z26$zSz_ndfV%3n=I(RzqS>TJr=YelQXt{aQVigua(2>;{EX#z`HK+5H+R!y=oL)a4 zd=aH*1dpbgLWJo9O`$OOOTN@$9^4Q9xDG(@;P=0?u#>ylq&e$?l~n~DAf3tx-&Ju8 zZH&i*3CW33!Q^k_i^S()$Ame>K;c|uy-~|z7`}zrN6T5ZQ2dLJqeN83TkboHi!4RzJYX+nM;;2s<(ZJ zc9Hlz+RlbBZLI-}nzm9^J6jckd4j9cQ@=HAvdBl+yNJ^B*gFAn9_Pu&$9DCIXtJ?C zn)|@|D!LktbTZl5|C^0_fy;<#`;*yP+=dv8*}lUX=ytOJB@kafjQipEJlq{zSb_cC zdJPPqW}%idnvQklae{9a^Q&5L6-x0r9D@h=GcXsqz41=Y?`K#)#{7H z=T(DaFZpobHk`5B41t9ablN+`wELrQSDi`1D7}7uH55@grSLxKQ7%9Iq5g>BZ2uFN zN6vOSvH$DCY(HsU(UJXsj?+e#qUUv)&>5UX45*4=Rg$WU3^xY2>weJSLEvbM2 literal 0 HcmV?d00001 diff --git a/tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl b/tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f6134063caf16e612479661d9dd67d4d460fadd2 GIT binary patch literal 3600 zcmc(h%WkVy6o%8=z>|)ucF;+r4mhVej<{pa3}eRzUt{ofVqvqv*cb@c*j1!Hz!|1b z)luhti@reF#w55Rh^jpbvkk0&ZPxd*_WJef-@f_w>U2Fmz5DdtdgEejrH0cqS>Y@1 z5E=RK{%Q967gtxmAcl&3yhe0|)Hfd|+|7#RgfY-POd8AwjMbUfOcXnq=I$`IylOxq zAFdJq-~brJnItVHPl!%BmHy^L=Y) za8i|LO~M+tv-u@(9{_6&x~bm9PRb~GXN3{afD5HgTS~t#)qi;!CvbtL1D{*3f#)UP zfJk>Yf>5UjxyJW_D5~T8qs2~^qBZjG6g8UiD-(H)Lp@({VUZvRB3=Uv*8YU${Mf(T zK-chrtC;CNnh%b9&I;AedygezppTDFpT18x<290?&6H|Ye$Gitb`>3r7D8|wl~_cxcU&C|V@G)Db{Ja>7)qPf5U ziZ2DLz%u+q4MswT{FFL`CF52g!&bm^e2eGhmdtT7pV;NkyvA^l!iM9|w03>TPMcc5 z)ItJp9P2Yti(2}C$>PehOPTm0yL2&<>fClkAQA*Qw+53v#X3WCrQU1pjXqms^B3Et zjM7`#1#-#QC0@Q_m(s1_k}uDlI>irJAE4;T2?_f@^4JFxYC!hbqhmxn+SB}jT5UE$ zKUz9Fi3?RNTl+lrW#Wt2)3VA7Sfq|cZ{D>8LFy2yhN(xY;gC2lOtSfl*_TmzL-qvA zUeYH~G{0Sx=br^mvM7m>6^@6$K6g4B{>lSx+B-%eOxs_s!|@@aaoYAB(I5{GJ9LIJ zVYc9Tz{|uJ0Ut2(JvzDz!|GITQ1cPG>ED_A+3j-E9-2Zeo?l+3kYeCvl->q-!r~eC zY5B^}UhZr_W3BX4;7|aIlz$fB@GST-=>Zgp<0y)gl~!ztpJ5D##KU239}pOkFt9p7 z*Ezm97W!*BYHpaI7ZO5sIFqp~mONXOi7&E+WACMC;BN+6Kz_ds6=vZIR zpjb8X3L{eQ`9`IJxon z^$FHqERNH;K5XF2mev!jox2FKMbCp?CcX&zcpFIdk!=jBk=@i>M%a=E{8kV*cJs($ Yy?=4=mr;5<=rNB==n&BV0X?|+8v^xbj{pDw literal 0 HcmV?d00001 diff --git a/tests/extras/test_vcf_annotation.py b/tests/extras/test_vcf_annotation.py index fd034b0b..0119a2c2 100644 --- a/tests/extras/test_vcf_annotation.py +++ b/tests/extras/test_vcf_annotation.py @@ -6,25 +6,31 @@ import pytest -from ga4gh.vrs.dataproxy import DataProxyValidationError -from ga4gh.vrs.extras.vcf_annotation import VCFAnnotator, VCFAnnotatorException +from ga4gh.vrs.dataproxy import DataProxyValidationError, _DataProxy +from ga4gh.vrs.extras.annotator.vcf import VCFAnnotator, VCFAnnotatorError -TEST_DATA_DIR = "tests/extras/data" +TEST_DATA_DIR = Path("tests/extras/data") @pytest.fixture -def vcf_annotator(): - return VCFAnnotator("rest") +def vcf_annotator(rest_dataproxy: _DataProxy): + return VCFAnnotator(rest_dataproxy) + + +@pytest.fixture +def input_vcf(): + return TEST_DATA_DIR / "test_vcf_input.vcf" @pytest.mark.vcr -def test_annotate_vcf_grch38_noattrs(vcf_annotator, vcr_cassette): +def test_annotate_vcf_grch38_noattrs( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_grch38_noattrs.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_grch38_noattrs.pkl" + output_vcf = tmp_path / "test_vcf_output_grch38_noattrs.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_grch38_noattrs.pkl" expected_vcf_no_vrs_attrs = ( - f"{TEST_DATA_DIR}/test_vcf_expected_output_no_vrs_attrs.vcf.gz" + TEST_DATA_DIR / "test_vcf_expected_output_no_vrs_attrs.vcf.gz" ) # Test GRCh38 assembly, which was used for input_vcf and no vrs attributes @@ -37,22 +43,21 @@ def test_annotate_vcf_grch38_noattrs(vcf_annotator, vcr_cassette): out_vcf_lines, expected_output_lines, strict=False ): assert actual_line == expected_line - assert Path(output_vrs_pkl).exists() + assert output_vrs_pkl.exists() assert vcr_cassette.all_played - Path(output_vcf).unlink() - Path(output_vrs_pkl).unlink() @pytest.mark.vcr -def test_annotate_vcf_grch38_attrs(vcf_annotator, vcr_cassette): +def test_annotate_vcf_grch38_attrs( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_grch38_attrs.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_grch38_attrs.pkl" - expected_vcf = f"{TEST_DATA_DIR}/test_vcf_expected_output.vcf.gz" + output_vcf = tmp_path / "test_vcf_output_grch38_attrs.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_grch38_attrs.pkl" + expected_vcf = TEST_DATA_DIR / "test_vcf_expected_output.vcf.gz" # Test GRCh38 assembly, which was used for input_vcf and vrs attributes - vcf_annotator.annotate(input_vcf, output_vcf, output_vrs_pkl, vrs_attributes=True) + vcf_annotator.annotate(input_vcf, output_vcf, output_vrs_pkl, incl_vrs_attrs=True) with gzip.open(output_vcf, "rt") as out_vcf: out_vcf_lines = out_vcf.readlines() with gzip.open(expected_vcf, "rt") as expected_output: @@ -61,27 +66,26 @@ def test_annotate_vcf_grch38_attrs(vcf_annotator, vcr_cassette): out_vcf_lines, expected_output_lines, strict=False ): assert actual_line == expected_line - assert Path(output_vrs_pkl).exists() + assert output_vrs_pkl.exists() assert vcr_cassette.all_played - Path(output_vcf).unlink() - Path(output_vrs_pkl).unlink() @pytest.mark.vcr -def test_annotate_vcf_grch38_attrs_altsonly(vcf_annotator, vcr_cassette): +def test_annotate_vcf_grch38_attrs_altsonly( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_grch38_attrs_altsonly.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_grch38_attrs_altsonly.pkl" - expected_altsonly_vcf = f"{TEST_DATA_DIR}/test_vcf_expected_altsonly_output.vcf.gz" + output_vcf = tmp_path / "test_vcf_output_grch38_attrs_altsonly.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_grch38_attrs_altsonly.pkl" + expected_altsonly_vcf = TEST_DATA_DIR / "test_vcf_expected_altsonly_output.vcf.gz" # Test GRCh38 assembly with VRS computed for ALTs only, which was used for input_vcf and vrs attributes vcf_annotator.annotate( input_vcf, output_vcf, output_vrs_pkl, - vrs_attributes=True, - compute_for_ref=False, + incl_vrs_attrs=True, + incl_ref_allele=False, ) with gzip.open(output_vcf, "rt") as out_vcf: out_vcf_lines = out_vcf.readlines() @@ -91,10 +95,8 @@ def test_annotate_vcf_grch38_attrs_altsonly(vcf_annotator, vcr_cassette): out_vcf_lines, expected_output_lines, strict=False ): assert actual_line == expected_line - assert Path(output_vrs_pkl).exists() + assert output_vrs_pkl.exists() assert vcr_cassette.all_played - Path(output_vcf).unlink() - Path(output_vrs_pkl).unlink() @pytest.mark.vcr @@ -160,7 +162,7 @@ def test_annotate_vcf_vcf_only(vcf_annotator, vcr_cassette): def test_annotate_vcf_input_validation(vcf_annotator): input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - with pytest.raises(VCFAnnotatorException) as e: + with pytest.raises(VCFAnnotatorError) as e: vcf_annotator.annotate(input_vcf) assert str(e.value) == "Must provide one of: `vcf_out` or `vrs_pickle_out`" From 6f909dd62f2cb4591dbb8e4591dae8aa2b19c077 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 3 Feb 2025 20:35:16 -0500 Subject: [PATCH 04/12] sdlfj --- .../extras/cassettes/test_grch38_noattrs.yaml | 1466 +++++++++++++++++ tests/extras/test_vcf_annotation.py | 14 +- 2 files changed, 1473 insertions(+), 7 deletions(-) create mode 100644 tests/extras/cassettes/test_grch38_noattrs.yaml diff --git a/tests/extras/cassettes/test_grch38_noattrs.yaml b/tests/extras/cassettes/test_grch38_noattrs.yaml new file mode 100644 index 00000000..435071dc --- /dev/null +++ b/tests/extras/cassettes/test_grch38_noattrs.yaml @@ -0,0 +1,1466 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:chr19 + response: + body: + string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n + \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n + \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n + \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n + \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n + \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n + \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n + \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n + \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n + \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n + \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n + \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n + \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" + headers: + Connection: + - close + Content-Length: + - '1035' + Content-Type: + - application/json + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=82663&end=82664 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl + response: + body: + string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n + \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n + \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n + \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n + \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n + \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n + \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n + \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n + \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n + \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n + \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n + \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n + \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" + headers: + Connection: + - close + Content-Length: + - '1035' + Content-Type: + - application/json + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=82663&end=82664 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=284349&end=284351 + response: + body: + string: CA + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284349&end=284351 + response: + body: + string: CA + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284351 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284349&end=284350 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284351&end=284352 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284352&end=284353 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284353&end=284354 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284354&end=284355 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284355&end=284356 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284356&end=284357 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284357&end=284358 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284358&end=284359 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284359&end=284360 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284360&end=284361 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284361&end=284362 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284362&end=284363 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284363&end=284364 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284364&end=284365 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284365&end=284366 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284366&end=284367 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284350 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284351&end=284366 + response: + body: + string: AAAAAAAAAAAAAAA + headers: + Connection: + - close + Content-Length: + - '15' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284366 + response: + body: + string: AAAAAAAAAAAAAAAA + headers: + Connection: + - close + Content-Length: + - '16' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=289463&end=289464 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289463&end=289464 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289464 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289465 + response: + body: + string: C + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289465&end=289466 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289466&end=289467 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289466 + response: + body: + string: CA + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=28946399&end=28946400 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=28946399&end=28946400 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=490413&end=490416 + response: + body: + string: ACT + headers: + Connection: + - close + Content-Length: + - '3' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490413&end=490416 + response: + body: + string: ACT + headers: + Connection: + - close + Content-Length: + - '3' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490414&end=490416 + response: + body: + string: CT + headers: + Connection: + - close + Content-Length: + - '2' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490413&end=490414 + response: + body: + string: A + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490416&end=490417 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490414&end=490414 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490416&end=490416 + response: + body: + string: '' + headers: + Connection: + - close + Content-Length: + - '0' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54220023&end=54220024 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=54220023&end=54220024 + response: + body: + string: G + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54220998&end=54220999 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54221653&end=54221654 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.32.3 + method: GET + uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=54221653&end=54221654 + response: + body: + string: T + headers: + Connection: + - close + Content-Length: + - '1' + Content-Type: + - text/plain; charset=utf-8 + Date: + - Tue, 04 Feb 2025 01:34:30 GMT + Server: + - Werkzeug/2.2.3 Python/3.10.12 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/extras/test_vcf_annotation.py b/tests/extras/test_vcf_annotation.py index 0119a2c2..4b6107e8 100644 --- a/tests/extras/test_vcf_annotation.py +++ b/tests/extras/test_vcf_annotation.py @@ -23,7 +23,7 @@ def input_vcf(): @pytest.mark.vcr -def test_annotate_vcf_grch38_noattrs( +def test_grch38_noattrs( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -48,7 +48,7 @@ def test_annotate_vcf_grch38_noattrs( @pytest.mark.vcr -def test_annotate_vcf_grch38_attrs( +def test_grch38_attrs( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -71,7 +71,7 @@ def test_annotate_vcf_grch38_attrs( @pytest.mark.vcr -def test_annotate_vcf_grch38_attrs_altsonly( +def test_grch38_attrs_altsonly( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -100,7 +100,7 @@ def test_annotate_vcf_grch38_attrs_altsonly( @pytest.mark.vcr -def test_annotate_vcf_grch37_attrs(vcf_annotator, vcr_cassette): +def test_grch37_attrs(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_grch37_attrs.vcf.gz" @@ -123,7 +123,7 @@ def test_annotate_vcf_grch37_attrs(vcf_annotator, vcr_cassette): @pytest.mark.vcr -def test_annotate_vcf_pickle_only(vcf_annotator, vcr_cassette): +def test_pickle_only(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_pickle_only.vcf.gz" @@ -140,7 +140,7 @@ def test_annotate_vcf_pickle_only(vcf_annotator, vcr_cassette): @pytest.mark.vcr -def test_annotate_vcf_vcf_only(vcf_annotator, vcr_cassette): +def test_vcf_only(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_vcf_only.vcf.gz" @@ -159,7 +159,7 @@ def test_annotate_vcf_vcf_only(vcf_annotator, vcr_cassette): Path(output_vcf).unlink() -def test_annotate_vcf_input_validation(vcf_annotator): +def test_input_validation(vcf_annotator): input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" with pytest.raises(VCFAnnotatorError) as e: From e5da86447dd60a09275fabc742077e6e3fbe28ca Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 3 Feb 2025 20:36:51 -0500 Subject: [PATCH 05/12] sdlfjksdf --- .../extras/cassettes/test_grch38_noattrs.yaml | 1466 ----------------- 1 file changed, 1466 deletions(-) delete mode 100644 tests/extras/cassettes/test_grch38_noattrs.yaml diff --git a/tests/extras/cassettes/test_grch38_noattrs.yaml b/tests/extras/cassettes/test_grch38_noattrs.yaml deleted file mode 100644 index 435071dc..00000000 --- a/tests/extras/cassettes/test_grch38_noattrs.yaml +++ /dev/null @@ -1,1466 +0,0 @@ -interactions: -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:chr19 - response: - body: - string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n - \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n - \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n - \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n - \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n - \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n - \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n - \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n - \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n - \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n - \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n - \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n - \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" - headers: - Connection: - - close - Content-Length: - - '1035' - Content-Type: - - application/json - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=82663&end=82664 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/metadata/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl - response: - body: - string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n - \ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n - \ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n - \ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n - \ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n - \ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n - \ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n - \ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n - \ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n - \ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n - \ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n - \ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n - \ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n" - headers: - Connection: - - close - Content-Length: - - '1035' - Content-Type: - - application/json - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=82663&end=82664 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=284349&end=284351 - response: - body: - string: CA - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284349&end=284351 - response: - body: - string: CA - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284351 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284349&end=284350 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284351&end=284352 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284352&end=284353 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284353&end=284354 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284354&end=284355 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284355&end=284356 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284356&end=284357 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284357&end=284358 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284358&end=284359 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284359&end=284360 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284360&end=284361 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284361&end=284362 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284362&end=284363 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284363&end=284364 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284364&end=284365 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284365&end=284366 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284366&end=284367 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284350 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284351&end=284366 - response: - body: - string: AAAAAAAAAAAAAAA - headers: - Connection: - - close - Content-Length: - - '15' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=284350&end=284366 - response: - body: - string: AAAAAAAAAAAAAAAA - headers: - Connection: - - close - Content-Length: - - '16' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=289463&end=289464 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289463&end=289464 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289464 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289465 - response: - body: - string: C - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289465&end=289466 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289466&end=289467 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=289464&end=289466 - response: - body: - string: CA - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=28946399&end=28946400 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=28946399&end=28946400 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=490413&end=490416 - response: - body: - string: ACT - headers: - Connection: - - close - Content-Length: - - '3' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490413&end=490416 - response: - body: - string: ACT - headers: - Connection: - - close - Content-Length: - - '3' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490414&end=490416 - response: - body: - string: CT - headers: - Connection: - - close - Content-Length: - - '2' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490413&end=490414 - response: - body: - string: A - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490416&end=490417 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490414&end=490414 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=490416&end=490416 - response: - body: - string: '' - headers: - Connection: - - close - Content-Length: - - '0' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54220023&end=54220024 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=54220023&end=54220024 - response: - body: - string: G - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54220998&end=54220999 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:chr19?start=54221653&end=54221654 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -- request: - body: null - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - User-Agent: - - python-requests/2.32.3 - method: GET - uri: http://localhost:5000/seqrepo/1/sequence/ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl?start=54221653&end=54221654 - response: - body: - string: T - headers: - Connection: - - close - Content-Length: - - '1' - Content-Type: - - text/plain; charset=utf-8 - Date: - - Tue, 04 Feb 2025 01:34:30 GMT - Server: - - Werkzeug/2.2.3 Python/3.10.12 - status: - code: 200 - message: OK -version: 1 From 6458c51ed22715c89c1c30be17daeaed28b0e80f Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 3 Feb 2025 20:48:45 -0500 Subject: [PATCH 06/12] maybe starting to pass more? --- tests/extras/test_vcf_annotation.py | 75 ++++++++++++++--------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/tests/extras/test_vcf_annotation.py b/tests/extras/test_vcf_annotation.py index 4b6107e8..abf94b49 100644 --- a/tests/extras/test_vcf_annotation.py +++ b/tests/extras/test_vcf_annotation.py @@ -23,7 +23,7 @@ def input_vcf(): @pytest.mark.vcr -def test_grch38_noattrs( +def test_annotate_vcf_grch38_noattrs( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -48,7 +48,7 @@ def test_grch38_noattrs( @pytest.mark.vcr -def test_grch38_attrs( +def test_annotate_vcf_grch38_attrs( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -71,7 +71,7 @@ def test_grch38_attrs( @pytest.mark.vcr -def test_grch38_attrs_altsonly( +def test_annotate_vcf_grch38_attrs_altsonly( vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path ): vcr_cassette.allow_playback_repeats = False @@ -100,16 +100,17 @@ def test_grch38_attrs_altsonly( @pytest.mark.vcr -def test_grch37_attrs(vcf_annotator, vcr_cassette): +def test_annotate_vcf_grch37_attrs( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_grch37_attrs.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_grch37_attrs.pkl" - expected_vcf = f"{TEST_DATA_DIR}/test_vcf_expected_output.vcf.gz" + output_vcf = tmp_path / "test_vcf_output_grch37_attrs.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_grch37_attrs.pkl" + expected_vcf = TEST_DATA_DIR / "test_vcf_expected_output.vcf.gz" # Test GRCh37 assembly, which was not used for input_vcf vcf_annotator.annotate( - input_vcf, output_vcf, output_vrs_pkl, vrs_attributes=True, assembly="GRCh37" + input_vcf, output_vcf, output_vrs_pkl, incl_vrs_attrs=True, assembly="GRCh37" ) with gzip.open(output_vcf, "rt") as out_vcf: out_vcf_lines = out_vcf.readlines() @@ -118,37 +119,36 @@ def test_grch37_attrs(vcf_annotator, vcr_cassette): assert out_vcf_lines != expected_output_lines assert Path(output_vrs_pkl).exists() assert vcr_cassette.all_played - Path(output_vcf).unlink() - Path(output_vrs_pkl).unlink() @pytest.mark.vcr -def test_pickle_only(vcf_annotator, vcr_cassette): +def test_annotate_vcf_pickle_only( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_pickle_only.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_pickle_only.pkl" + output_vcf = tmp_path / "test_vcf_output_pickle_only.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_pickle_only.pkl" # Test only pickle output vcf_annotator.annotate( - input_vcf, vrs_pickle_out=output_vrs_pkl, vrs_attributes=True + input_vcf, output_pkl_path=output_vrs_pkl, incl_vrs_attrs=True ) assert Path(output_vrs_pkl).exists() assert not Path(output_vcf).exists() assert vcr_cassette.all_played - Path(output_vrs_pkl).unlink() @pytest.mark.vcr -def test_vcf_only(vcf_annotator, vcr_cassette): +def test_annotate_vcf_vcf_only( + vcf_annotator: VCFAnnotator, vcr_cassette, input_vcf: Path, tmp_path: Path +): vcr_cassette.allow_playback_repeats = False - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - output_vcf = f"{TEST_DATA_DIR}/test_vcf_output_vcf_only.vcf.gz" - output_vrs_pkl = f"{TEST_DATA_DIR}/test_vcf_pkl_vcf_only.pkl" - expected_vcf = f"{TEST_DATA_DIR}/test_vcf_expected_output.vcf.gz" + output_vcf = tmp_path / "test_vcf_output_vcf_only.vcf.gz" + output_vrs_pkl = tmp_path / "test_vcf_pkl_vcf_only.pkl" + expected_vcf = TEST_DATA_DIR / "test_vcf_expected_output.vcf.gz" # Test only VCF output - vcf_annotator.annotate(input_vcf, vcf_out=output_vcf, vrs_attributes=True) + vcf_annotator.annotate(input_vcf, output_vcf_path=output_vcf, incl_vrs_attrs=True) with gzip.open(output_vcf, "rt") as out_vcf: out_vcf_lines = out_vcf.readlines() with gzip.open(expected_vcf, "rt") as expected_output: @@ -156,50 +156,49 @@ def test_vcf_only(vcf_annotator, vcr_cassette): assert out_vcf_lines == expected_output_lines assert vcr_cassette.all_played assert not Path(output_vrs_pkl).exists() - Path(output_vcf).unlink() -def test_input_validation(vcf_annotator): - input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" - - with pytest.raises(VCFAnnotatorError) as e: +def test_annotate_vcf_input_validation(vcf_annotator: VCFAnnotator, input_vcf: Path): + with pytest.raises( + VCFAnnotatorError, + match="Must provide one of: `output_vcf_path` or `output_pkl_path`", + ): vcf_annotator.annotate(input_vcf) - assert str(e.value) == "Must provide one of: `vcf_out` or `vrs_pickle_out`" @pytest.mark.vcr -def test_get_vrs_object_invalid_input(vcf_annotator, caplog): +def test_get_vrs_object_invalid_input(vcf_annotator: VCFAnnotator, caplog): """Test that _get_vrs_object method works as expected with invalid input""" # No CHROM - vcf_annotator._get_vrs_object(".-140753336-A-T", {}, [], "GRCh38") + vcf_annotator._process_allele(".-140753336-A-T", {}, {}, "GRCh38") assert "KeyError when getting refget accession: GRCh38:." in caplog.text # No POS - vcf_annotator._get_vrs_object("7-.-A-T", {}, [], "GRCh38") + vcf_annotator._process_allele("7-.-A-T", {}, {}, "GRCh38") assert "None was returned when translating 7-.-A-T from gnomad" in caplog.text # No REF - vcf_annotator._get_vrs_object("7-140753336-.-T", {}, [], "GRCh38") + vcf_annotator._process_allele("7-140753336-.-T", {}, {}, "GRCh38") assert ( "None was returned when translating 7-140753336-.-T from gnomad" in caplog.text ) # No ALT - vcf_annotator._get_vrs_object("7-140753336-A-.", {}, [], "GRCh38") + vcf_annotator._process_allele("7-140753336-A-.", {}, {}, "GRCh38") assert ( "None was returned when translating 7-140753336-A-. from gnomad" in caplog.text ) # Invalid ref, but not requiring validation checks so no error is raised - vcf_annotator._get_vrs_object( - "7-140753336-G-T", {}, [], "GRCh38", require_validation=False + vcf_annotator._process_allele( + "7-140753336-G-T", {}, {}, "GRCh38", require_validation=False ) assert "" in caplog.text # Invalid ref, but requiring validation checks so an error is raised invalid_ref_seq_msg = "Expected reference sequence C on GRCh38:7 at positions (140753335, 140753336) but found A" with pytest.raises(DataProxyValidationError, match=re.escape(invalid_ref_seq_msg)): - vcf_annotator._get_vrs_object( - "7-140753336-C-T", {}, [], "GRCh38", require_validation=True + vcf_annotator._process_allele( + "7-140753336-C-T", {}, {}, "GRCh38", require_validation=True ) assert invalid_ref_seq_msg in caplog.text From 18df8c3ff6136b70dcd37fb92b2be61f2b11d116 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 4 Feb 2025 11:47:54 -0500 Subject: [PATCH 07/12] sdlfkj --- src/ga4gh/vrs/extras/annotator/cli.py | 81 +++++++++++---------------- 1 file changed, 34 insertions(+), 47 deletions(-) diff --git a/src/ga4gh/vrs/extras/annotator/cli.py b/src/ga4gh/vrs/extras/annotator/cli.py index 87d3d109..612335c2 100644 --- a/src/ga4gh/vrs/extras/annotator/cli.py +++ b/src/ga4gh/vrs/extras/annotator/cli.py @@ -12,6 +12,9 @@ import click +from ga4gh.vrs.dataproxy import create_dataproxy +from ga4gh.vrs.extras.annotator.vcf import VCFAnnotator + _logger = logging.getLogger(__name__) @@ -69,12 +72,12 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR @_cli.command(name="vcf") @_log_level_option @click.argument( - "vcf_in", + "vcf-in", nargs=1, type=click.Path(exists=True, readable=True, dir_okay=False, path_type=Path), ) @click.option( - "--vcf_out", + "--vcf-out", required=False, type=click.Path(writable=True, allow_dash=False, path_type=Path), help=( @@ -82,7 +85,7 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR ), ) @click.option( - "--vrs_pickle_out", + "--pkl-out", required=False, type=click.Path(writable=True, allow_dash=False, path_type=Path), help=( @@ -90,35 +93,16 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR ), ) @click.option( - "--vrs_attributes", + "--incl-vrs-attrs", is_flag=True, default=False, help="Include VRS_Start, VRS_End, and VRS_State fields in the VCF output INFO field.", ) @click.option( - "--seqrepo_dp_type", - required=False, - default=SeqRepoProxyType.LOCAL, - type=click.Choice( - [v.value for v in SeqRepoProxyType.__members__.values()], case_sensitive=True - ), - help="Specify type of SeqRepo dataproxy to use.", - show_default=True, - show_choices=True, -) -@click.option( - "--seqrepo_root_dir", - required=False, - default=Path("/usr/local/share/seqrepo/latest"), - type=click.Path(path_type=Path), - help="Define root directory for local SeqRepo instance, if --seqrepo_dp_type=local.", - show_default=True, -) -@click.option( - "--seqrepo_base_url", + "--dataproxy-uri", required=False, - default="http://localhost:5000/seqrepo", - help="Specify base URL for SeqRepo REST API, if --seqrepo_dp_type=rest.", + default="seqrepo+http://localhost:5000/seqrepo", + help="URI declaring source of sequence data. See subcommand description for more information.", show_default=True, ) @click.option( @@ -130,13 +114,13 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR type=str, ) @click.option( - "--skip_ref", + "--incl-ref-allele", is_flag=True, default=False, help="Skip VRS computation for REF alleles.", ) @click.option( - "--require_validation", + "--require-validation", is_flag=True, default=False, help="Require validation checks to pass to construct a VRS object.", @@ -151,13 +135,11 @@ def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: AR def _annotate_vcf_cli( vcf_in: Path, vcf_out: Path | None, - vrs_pickle_out: Path | None, - vrs_attributes: bool, - seqrepo_dp_type: SeqRepoProxyType, - seqrepo_root_dir: Path, - seqrepo_base_url: str, + pkl_out: Path | None, + dataproxy_uri: str, assembly: str, - skip_ref: bool, + incl_vrs_attrs: bool, + incl_ref_allele: bool, require_validation: bool, silent: bool, ) -> None: @@ -166,26 +148,31 @@ def _annotate_vcf_cli( $ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl Note that at least one of --vcf_out or --vrs_pickle_out must be selected and defined. - """ - annotator = VCFAnnotator( - seqrepo_dp_type, seqrepo_base_url, str(seqrepo_root_dir.absolute()) - ) - vcf_out_str = str(vcf_out.absolute()) if vcf_out is not None else vcf_out - vrs_pkl_out_str = ( - str(vrs_pickle_out.absolute()) if vrs_pickle_out is not None else vrs_pickle_out - ) + + Sequence data from a provider such as SeqRepo is required. Use the `--dataproxy_api` + option or the environment variable `GA4GH_VRS_DATAPROXY_URI` to define its location. + Currently accepted URI schemes: + + \b + * seqrepo+file:///path/to/seqrepo/root + * seqrepo+:../relative/path/to/seqrepo/root + * seqrepo+http://localhost:5000/seqrepo + * seqrepo+https://somewhere:5000/seqrepo + """ # noqa: D301 + data_proxy = create_dataproxy(dataproxy_uri) + annotator = VCFAnnotator(data_proxy) start = timer() msg = f"Annotating {vcf_in} with the VCF Annotator..." _logger.info(msg) if not silent: click.echo(msg) annotator.annotate( - str(vcf_in.absolute()), - vcf_out=vcf_out_str, - vrs_pickle_out=vrs_pkl_out_str, - vrs_attributes=vrs_attributes, + vcf_in, + output_vcf_path=vcf_out, + output_pkl_path=pkl_out, + incl_vrs_attrs=incl_vrs_attrs, + incl_ref_allele=incl_ref_allele, assembly=assembly, - compute_for_ref=(not skip_ref), require_validation=require_validation, ) end = timer() From 8eac54a4c9e7f853f5a2271e454e7305c856e4d8 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 4 Feb 2025 16:14:08 -0500 Subject: [PATCH 08/12] dont commit this --- .../data/test_vcf_output_grch38_attrs.vcf.gz | Bin 4753 -> 0 bytes .../test_vcf_output_grch38_attrs_altsonly.vcf.gz | Bin 4504 -> 0 bytes tests/extras/data/test_vcf_pkl_grch38_attrs.pkl | Bin 7026 -> 0 bytes .../data/test_vcf_pkl_grch38_attrs_altsonly.pkl | Bin 3600 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz delete mode 100644 tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz delete mode 100644 tests/extras/data/test_vcf_pkl_grch38_attrs.pkl delete mode 100644 tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl diff --git a/tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz b/tests/extras/data/test_vcf_output_grch38_attrs.vcf.gz deleted file mode 100644 index a8805ec627809e3e20bbfd558c0671d4046f2210..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4753 zcmV;C5^n7uiwFb&00000{{{d;LjnN3482@yZ`{Te{jB^7LjKaCvM}!l8U;`#TM;V9 z*26&q1Q4iQu9n3vxgoi7)S~~rcles&l5RKFweulCcFA+k+{d|hhSa`#wMbXVA}`i) zb@KDs+b40%zI*lR?d8?oo12sGFV9atoZj9ZohRk2NHB|$6363F4OY9OseBWTvqW9BTVKo8MRz8`*KX)VNz?D zl~N*L3Xe$`6#)8jOxa@^D!E}8JXMdWi6PQz1WesyO3MXc@|Ra*SZe z&cuWi3d01(MpJb*CWb)+mNAZTsP4Lk9920rQd*2nqwW?m)?8+YK?z7_gO5d*QYIzF zB+`u{d}>Z08lydV8)LPCZ5PJe=}nAL=jzYV=N^Clms>66T_#W zmZ2_VEG3k1!x5y!+nAUQE!POb?oEt~96rN1DEg(fASUK2Xjurc_?R^&IDzWxC5mZ; z!uHT2fQDMYgytR^MiGQM_Runb78nxIL$lD=fQ>M_dTipc-7z2{G!39B48)kwDs;BM z%swgyiH+)zAdc1|sFp{jp{qH@mfvG*kIl8Rh$In0}=;7Sd26*EOt=xvQTKnbOXEf09c?jgo_5n4v-sdZfV|;Ux=e z9aXd?55d|r6{jw_1Hp$BLhJ^2aqDvRf(D*COjAssJd#xG23E1 z=<(3u34=tfMiVHX8y^s%b&%Jb63nSmngCk}f!YjgcvPe?&MQ6bplIA%sF=_}cT|#K zI>ZF^sHnu$J{!~{Cm1g#dgvgtAr~Q>e#s z;Z6)3g33DVGXmp1;~rX&uLjM-tpUNDusIJ}80<_jPqJ0mcnv{RKCZ%}GlRM1f!o3Bq1HmJE5KC%*+r_Fq@nb8<;Qh3%OG$)u231?s!o=|-q zK6u1<<=B%Bqi9hZfeDYAF`qi}a7!Q<=7BBOVZl(0Kc(}~VLMk66`BoF54GnG4i+%a zg1HW#)I)Q}gbp7b8`K98;USK;7V~_Fg=ZED>Ft*GwcWw9L1vJ)jm?{tgVS+sDbkG7 zzBoPDlfsCN=oS_}od)I#^ORDHP+O%I7=J$HnTJ9p2~sC(;~CaGboOagt{UP%In}g zxE!2h7=zh0wH%Z_tOPR;W%!^|qx>D6#}2AfF^lAY8lj~rVMGmYGr(IE~-&}8^G6QIC2_=@;4`*xq9+H!Jw7G?QUi;P*8zzw(;g~ zKYP9~Uwu*f1wUJJalJ~)vRcNOCc~evx!ta#d=Xt-M~l3O=5ZARMj3rdt7TLz(=vKY zvb=iUB#}di97Lqa;SPShS~Wjwlg#66K2OW|u}G5f59Q3@!Ww@WKP3&m`zY#ej(|hH zt;%%nzD?>c{P*N1KOwTJ?A zK-67Q#_Ob~xl78gWSez1hOEvyMs;ZXRSne__`~0{Osb*Hw?iABmheAOT>s2bnmsli z0nfmFbztIXnLaLEF1ghY2-cV7Yscg$f}3XvWL=eHwP)P@@cpME&mMQ#{kBa?N5(lf z&F0B!4~AvFPIe*~dpb2rw2F(zq-gS)l7~&R0Pq zvui(pN{Usy*#Ki_s{9 zx5Lw2=V*RQat~?zh1}{isU@NgP~`e>oMtX+aj%orXKlw3Q?}g~&h(^1osB*CU!^V$cj*t?b4T}h>?t?2AX$Mv7`MY^GXbvh zPasy%R^!Kdnf2)P?z)k&UHyPWNN3ws)d2hfO8uRk?FWd9`)$jCQuZ`=?8e1IS`~2x zv+N%6y7~AfN*CeaHV-G-rhAupSBDLB?GKoYvLu;L#sHw4zZ}JFmj33_tsNlta6>zE zHzwWfTQ_<&Ok5eep96Ic=r>6ari(k5$yvny(tq7AshdaQpffB!;%vTH9U9>G|o^Rkxa(|FfEQ zSg(CpPAz4O{$l8<-9W8St;!F#yg`ia_7H_L@53%o6RXcBEN5 zkJRecje?7Q3*A@h&o{UCr?q$PFVD*ze@-f0ogC}U*5#8M0IGD6y5A3WGmy)ii&Js& zV+6QSgX_c=#)@*2XLa6Jd2`uay_x;JNkjL^&6~INh;e##*9L5Y>zks;i#_z?(rvk- z#9bZbv)Q(&w}>_GX8LgiFMIk&HG}4I==vq}rLpbXDlV#heB^@TmYmMF&RNg{(kynn z!)@twp^@Kiu8PVU_FMTco8zw_0nl&6A7W4?`{pTVB8(U-s0e5uZc;5Pzlg8J_w2`; z_wNoqyuUrTJU@VI4&eKP>yM{b2hEYffm_c(Q`-miuVa7rUtBbUJ-b@~03VA81ONa4 z009360763o0Ja6~*xPcVNEiU%YxgO<*{a>Gj?>KK`CQJ3U7O%HT}Wor}o7-)g#PPG=# zT46q+X?eJ=XhwM!4>w4-s zj%%l065*K}EbH?bE!1K>o2FrG#6d(l8a|F@cnZ8=?VDj}TCV3NTQb@8c&Hh&=V1>U zon~C0napf1jpf>AqD9H5$Ebtu$abS{YS?2xU}tmFoy4jyfbpx56a>kN7YdoO z!eb@xi#(VqXtHIRNB7Bro~N3dC;%P}aw3c^bBp7%mP^m_O|~b`@*#ug_2xQHHL62p zP79f_cHjqL;BAS;*q9}p6&kJ|;>f{vGD{F7*RdCbEZ7gxNUMj3IK*aB^qlYRk`35( zf0wZ0x&2ny>&(+G-Cv8C5}zG+n=bAb;n!KaLluNg$b9`f?$4m5!C|lS@U{D(S?e2( zF;*e3ahqepnJ`8KJ7@*s{2Jxe99Lp21G)TjNi{=J5<)xyfg}L|o|R~pJJ=SPJgMpx zU6B=4QFK+8WnK9c^aJt?-G`u`PlqM`1$6E*^iT3W$=pSpoOS=*)W0W1=dJr^FIArV zKYlc*-XoqY2K~)de?FSyNjRq{^U1t&Zk}{=h@*|wi3;LdZPg5uiPLTv;zP?xd0|CU z&6mdH##yVgJHAYuK%>FV+cEL%dRJW{u7fg1!9O8{tBeS5xP09Ec)# zEu6*zgy6Ms5)bDC0ObK7@Y&9VBDozQ7SOyPv5?va03$*MWjS6}^osl|0RD#n_|K>p zk#*6s7qpX}v8J2P=yp5l_4)fj;?e`!j%1co?ziBm=YltBFGvh~HQKH@iVKqP#$~pf z1)M;Bb!<6@MT4EM!f}4h?W#*To%jQ_OKDu7kc7ym1Y!k=qDhQUWFHP8vxm%*oeYFj zMp>==1$VCZs^!PM ztar^a{TeyhE$}i)m+qm diff --git a/tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz b/tests/extras/data/test_vcf_output_grch38_attrs_altsonly.vcf.gz deleted file mode 100644 index 61708a9e4f4adffdb66eebeaa5c3fdaa054140c9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4504 zcmV;J5ohiniwFb&00000{{{d;LjnM~482_2ZsSH0eRjTr@Vx9|Jx1TViH!qzJu{9G zPiA~cumJ)Xv_x54NTfqjo_H7g?_14V7eytjjBApI1d+s3r|WX6x=HiZt3|d-7e%>F z>XRSO-aV;f@ztwW?=CKH-(H`5eevex1q|hRdrfc(PL6o z>HP2;xW)s$N$T`OFm9L=3O+l#Jb9We+8;-+v*>m7y72#c{jcOHX(6M-#U`J5S#H)zes_L-wv_HJFYc06eV63P>bc6QyDF`Z7fDqo-%Kz?z!W@|TEs8MlszVo zorty81ehvdGQkyqz8q5zn2u}d1O-nsU|Qywjx7PE9WZq)6=2GjW4ZxTX)c|S=H;02 zfGM4fmF4#3n4ASrX=6*0^qdD!&73hLpaL-!R!c?@dLC+tD`}JzjsP?i6SGW7%PBxZ zH8CE?oI8r;59Jgy2}z|0s-6$!L$ZsG9omwqpmefN)&Va z-T5VB3^Hej2`ys+N2%5XgTfYDCIpKu#R#RlRqW7?SQv^aCGJBp)j!3UArPm*!gLK1 zb{UG$!c_jCupLF&^(Mqr|76TGg*x?>y(?oGlN2KqA5u2nk+Pg5WqTV!O!Y%>&M7v4 zV59TZ4k?cnO+31F0NxH8T#?K-!#bw@LOM<{k_8jfWyqN1C?c5H(J`!WB)xmEF>M*J z36vDU(7lNrQmC9z6dO&`+t@J-Iw3CvQor(k2MR^ z8u1;&r{l38UB*;OIFn8iNJ+4XPXB@p&tVp`JSviI3`f7fXuP0CB>FMVB3H#t%(U$L}8D>W`>Zq!Y&a;5ynw* zEKrK#+#o>5$RUj47HoiaBNZW(RfR=FNI_WXN{pHTWeZa{u?5M2Wne8v`b6q$)WTv% z>T2A9JIY236$1Wj+mWXvMNbc|jY=@hH zfISeUPY}2wx!2W0Ydq%&k~@C`G>fsYQrw&mtViv@5%x#Ta3EufgN48v7o=o_sjc~F zM?-QuAYhO5&v8NNZrp6Np;Qu$hbUl8aGNJUBPB7UEkQG+e_ConjnboGSdc=GdZeQq z;kCfEPAb}3V)5e?PiF*Zi4 zkLB=A3>-qr8uuAR@tpAht=JcXmUwHxC?{+wQ452eDdkDF!Hw53LgkYNADubMEw42A z&;@2dN@x^DCn*mvq;+4cWb9a;D1D7vIv1q34dT(cts~stNj&xWrw}PwW98$N=e9D& z8sZ#f^5J0tB!p6)VpQm{B_)mhVI67i;agUU^0ZlwHZzvuNs7Q4LrX^K5Ih6J@I>ls zeB+Vgk>fxbM=>UC1P&iHQ{HuycuSxJ<$%DBfSxU4{>Z9Q=ShAd}d*Yo^I(L+a28-6pm=y*gjb~Ivuw$Cz^4( z2d77SQW&ub-6HVaX<%+BPbsa!+N!Oh`17g291E3ZM4haQC$4$OLyE^&gIDmDnlsWy zX-9{*#*se8WfiI+#T~_i!vPzG(-=j0JI^XS%E!)1LOHZp4r59B?1)uZSF|H}9DD%R zqmvBhD7&WBqtb_!Q0Ae=H##lJ-_ZqZRGmh|luzbajSq>TB~hk|Mztx86_h%z4et1w za+I|i)u!cwXuj%Xd~_~qN!|?*Y7>u~&XD}gNnmcC!Ydf8F|^&yjRp#?2+lU%{^Q5M z7v`(aNGMXoK0vJ{FDXW)Jz09iUAO8#ip*Z zx&Jn8zD({{X;h{U08&MO?!S0$0l<)dM^2L01I@}qJSAo3KWxnX4%hol{iTQkG$QJM zQYGtjpt(&dtYnk-HpZ;ZdPYrX!mnDYFYrgWXqnbyoA1UpJ}u#YqNI7vQI*4!PWsyH@v-@R}R-TM^ZkEr} z)gBD1Vx4Y9FbQ-TlxUTd4{6!vGiUdY?E=7OfY6r#T!R48Q#t|>Dfb(ub%f{>VFoU6@Z`CX><-TTW2#*d5&_e`^&ZrGU~(VW=E|tMuu%z=e1l*B*{|8L@Y(haJ9gXl zi>I_)C6AB5*c0~*ywXOwFoFv^?QQw@^1H7kQmRhK!ls7Im_M;6r@o z=_Ax(wOOx$crq&sP)ARw_sCI{{yJN2=AP;ME7n|8jm{i7r@kmF#yf=dP8m?JDBk0wj z)4GXN9C_)cI&3n;OC&J1*l=2L3a7I9FLbdlu!1i(Gs+Lj}B+4f&}(>opNZS2ASGV^J;O@G{;d%DL{Px+yR$O`nqq#G{V32;?> z0jxx4HruS~7T^z18m{bZ-$PvdyDdjbIndm)o0RuiT_!cm zvU|kq`iHksw!nki98a`u_pXY*4qNEPA21o^X*%B-1Ay}W@)S3D_KQ!qZh+Xs4c*Y) zn)H`%{OHv(@n!5^2kIR#Y?2^M=eItS^GN({_%2_dVaN+Qi5~Nd-}g@l2uke?=}f5ha&Ka+&Ske z@_D(XZJk@%ib#EdRB2wp2ylLT6m6Q8(G2{!uQhOK0&TCew!M_oH>a1E{c5iN&uY41 zz42kSYbjIo7envb4KxbXsr-1$i(+)UhbWu{ANGOTv8ui7eZ1KxCqBZS4|b4#N1Au@ zNTY83D7YB5&_kvEcztts+IZ*g;?2+%Pk%@0n&enNZhSuZ0ieznnSXz;DZ8o#NwrS`;Ila8?0=0wV+p;XmJ#^#3Z?&S-|2isW zvrXA-4I94g)Z>X=4786L%k9sh+gH$!rnYbDq^$SxjSq@nYBt|^CxP~7dE$44o674! zE4%$X6;%!Fm-bser)Qo5=ojVMRa@H}fyYKvgxw%`Jn5bE`EmJ#VvQbGcn|Iqoa^ zMP+*TcKZz9O%gY7@G6cxbb}N<7 zcHA&%&m2ai66{Q~C^3>SCKC-G#}1wXFI@U&6q%Omx#@~LtaUrq48`-XhmB!3X*(wC z%;Z~zvLt1AgPjCv*ALf1aPc5|5Uo6KzPbsLvy3ekYN;8Mzk42Rrs(Je({*z@o{~W5 zjlM3AG`TyHm9BDmNxsYde;V)$k|`N+tPBAxz7P$6n-qR%k!6S*0!Sf-UJGLP1Pwhe z^WDG%B_$HMQWIO%v>WZivbM1=iE&k8JNGG|)srh?TSD4dWXBj=<_agZCM$dSDZjnF zoMfq9VWDhaRe!9`7%{gp2>mb$y%jN)7*5Jtk>UCgj%_?iofJXRPP{PUz-Ea?Mm;>n z5jN9u=4^eJe8GD9L5e11zx|zjb)coe@zwC|+r^z`EzdR9SVV%xKkZUnyD2T^ZJyu7 zs8m0W(PU{_Mo1u0MY4#1z{w29Z-Gi|k>T`)t}1F@RrS8ED7yM4ty`(NoRhTH=cbC` zQY(L4>l=;krO^_5E#7x~eNVnDMmKw}bk_YIzxtnX$mvR3bffg%9&`-pu4iY0xM1k+ zbLaltUiO`9q3Sf}v+IuESUo z<$j6iD-^{&h10nbes9%V=H~6g^VUE;%J*cl|Ki*KLuk%!xoh@HdXC)XoKNiZdV1FD zF4CnD%{65)wO?swBx%nxQCAUNPZ$-1MIk>1&?Ltq!~sSSWe(CC!mI>Yl;>JS)f>tu z;s1f~e}=XE-#Dt#3+IB#cwuTx7B{OUPxE0KoxAMnX%6q9u-MhO=2BWL+Lhc&9ZRMn zjby|#0CA#BGZLU_axjIE-C*R%sQ^Mc=h$!j1?o7WJ&yaO4XVPcFJ@3x(yIWxt%}(@ zIlV*yuL64zMD1B~(1#FFMIDpRrYzH^NXatCL7D~bf`SN2G^FwZ2OO?;_Ayal z%V{W_GcM{dd`D4xH^Z+Jec77{+0}v>o{jExw9APCn~?96INQETokM{GZ)x;}&X1E^ zp5e%6`BRd1RY{VRC%M{^`P0;kXiVLL~2F-Q( zBNqW~nrSl%6E`JWNt%Cb!Y%$ztIe0t|Jk&%v}uP~Fl`>Kn1AL7@!YyFQNZX^(ubB* z_o{n*_9iqtOa0C*8azHe-#s{^E^YMft6?yYA2UGhW~WWSnN4qzon8=>GmF9(9DVK8 qtNH=Bs@~_$6951oiwFb&00000{{{d;LjnLB00RI3000000001}^PQIf diff --git a/tests/extras/data/test_vcf_pkl_grch38_attrs.pkl b/tests/extras/data/test_vcf_pkl_grch38_attrs.pkl deleted file mode 100644 index 2cce18ec699f4e583a58504c0dc1b8e6a2666d16..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7026 zcmeI0%aWpI6vro*O|OzlrIN*LQk8|1)X>FEkl7*H2r6CxMK=~Iw-y1ph??rEJisi? zQ>Btr_IZoEKmfJH6hkxBJ=L4G?1k9p|Ka(a|2f~suV4PAy?&Lv?)Goqe>KxxhyW@u zhM@ojP2a%?!m`Z$QZfJ_<$JdI)oAylh4cm(T|8jl4b?r$ehHXg5QjCc_r9)!0 zq!$Ls0~BE*me@$I&nv4yv1Ur^q<3*AMU=dwLSIo8HGb1zbhd6rY^U z6wf>Sh7#YqW|(Rj5h@g?u;R-F5U z$nk6c@&;Olueh>#x{vzCagVFUYR7wzBuZ52?r#77+XV4dB6vkzy(tN$w?BP=^OPVN zQPDhz?~FRQ)f`n!5ZapNVk=r65TNNiXK^kP|HApHISUErZw^+5D=1P10g8;?d+E~s zanfAHQDmPFrh#Sn2emjdZuwuIS^~M-0hJvK za$j*bQ^rlR$*PSa@p-ke3LW3BwDIv`c(WW$fJ(~|14DP2kOl@Tk4>+iUmHb~o~t$@ z2 z26$zSz_ndfV%3n=I(RzqS>TJr=YelQXt{aQVigua(2>;{EX#z`HK+5H+R!y=oL)a4 zd=aH*1dpbgLWJo9O`$OOOTN@$9^4Q9xDG(@;P=0?u#>ylq&e$?l~n~DAf3tx-&Ju8 zZH&i*3CW33!Q^k_i^S()$Ame>K;c|uy-~|z7`}zrN6T5ZQ2dLJqeN83TkboHi!4RzJYX+nM;;2s<(ZJ zc9Hlz+RlbBZLI-}nzm9^J6jckd4j9cQ@=HAvdBl+yNJ^B*gFAn9_Pu&$9DCIXtJ?C zn)|@|D!LktbTZl5|C^0_fy;<#`;*yP+=dv8*}lUX=ytOJB@kafjQipEJlq{zSb_cC zdJPPqW}%idnvQklae{9a^Q&5L6-x0r9D@h=GcXsqz41=Y?`K#)#{7H z=T(DaFZpobHk`5B41t9ablN+`wELrQSDi`1D7}7uH55@grSLxKQ7%9Iq5g>BZ2uFN zN6vOSvH$DCY(HsU(UJXsj?+e#qUUv)&>5UX45*4=Rg$WU3^xY2>weJSLEvbM2 diff --git a/tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl b/tests/extras/data/test_vcf_pkl_grch38_attrs_altsonly.pkl deleted file mode 100644 index f6134063caf16e612479661d9dd67d4d460fadd2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3600 zcmc(h%WkVy6o%8=z>|)ucF;+r4mhVej<{pa3}eRzUt{ofVqvqv*cb@c*j1!Hz!|1b z)luhti@reF#w55Rh^jpbvkk0&ZPxd*_WJef-@f_w>U2Fmz5DdtdgEejrH0cqS>Y@1 z5E=RK{%Q967gtxmAcl&3yhe0|)Hfd|+|7#RgfY-POd8AwjMbUfOcXnq=I$`IylOxq zAFdJq-~brJnItVHPl!%BmHy^L=Y) za8i|LO~M+tv-u@(9{_6&x~bm9PRb~GXN3{afD5HgTS~t#)qi;!CvbtL1D{*3f#)UP zfJk>Yf>5UjxyJW_D5~T8qs2~^qBZjG6g8UiD-(H)Lp@({VUZvRB3=Uv*8YU${Mf(T zK-chrtC;CNnh%b9&I;AedygezppTDFpT18x<290?&6H|Ye$Gitb`>3r7D8|wl~_cxcU&C|V@G)Db{Ja>7)qPf5U ziZ2DLz%u+q4MswT{FFL`CF52g!&bm^e2eGhmdtT7pV;NkyvA^l!iM9|w03>TPMcc5 z)ItJp9P2Yti(2}C$>PehOPTm0yL2&<>fClkAQA*Qw+53v#X3WCrQU1pjXqms^B3Et zjM7`#1#-#QC0@Q_m(s1_k}uDlI>irJAE4;T2?_f@^4JFxYC!hbqhmxn+SB}jT5UE$ zKUz9Fi3?RNTl+lrW#Wt2)3VA7Sfq|cZ{D>8LFy2yhN(xY;gC2lOtSfl*_TmzL-qvA zUeYH~G{0Sx=br^mvM7m>6^@6$K6g4B{>lSx+B-%eOxs_s!|@@aaoYAB(I5{GJ9LIJ zVYc9Tz{|uJ0Ut2(JvzDz!|GITQ1cPG>ED_A+3j-E9-2Zeo?l+3kYeCvl->q-!r~eC zY5B^}UhZr_W3BX4;7|aIlz$fB@GST-=>Zgp<0y)gl~!ztpJ5D##KU239}pOkFt9p7 z*Ezm97W!*BYHpaI7ZO5sIFqp~mONXOi7&E+WACMC;BN+6Kz_ds6=vZIR zpjb8X3L{eQ`9`IJxon z^$FHqERNH;K5XF2mev!jox2FKMbCp?CcX&zcpFIdk!=jBk=@i>M%a=E{8kV*cJs($ Yy?=4=mr;5<=rNB==n&BV0X?|+8v^xbj{pDw From 4498cc5106c77896268eea456e0b29191fd4c8be Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 4 Feb 2025 16:35:08 -0500 Subject: [PATCH 09/12] stash --- src/ga4gh/vrs/extras/annotator/vcf.py | 2 +- tests/extras/{test_vcf_annotation.py => test_annotate_vcf.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/extras/{test_vcf_annotation.py => test_annotate_vcf.py} (100%) diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index f2cbee09..b957001e 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -121,7 +121,7 @@ def _process_allele( ) raise else: - if not vrs_obj: + if vrs_obj is None: _logger.debug( "None was returned when translating %s from gnomad", vcf_coords ) diff --git a/tests/extras/test_vcf_annotation.py b/tests/extras/test_annotate_vcf.py similarity index 100% rename from tests/extras/test_vcf_annotation.py rename to tests/extras/test_annotate_vcf.py From 618f1394c49e99d83582334be6148ff07e187081 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Tue, 4 Feb 2025 16:35:42 -0500 Subject: [PATCH 10/12] add docstring --- src/ga4gh/vrs/extras/annotator/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ga4gh/vrs/extras/annotator/__init__.py b/src/ga4gh/vrs/extras/annotator/__init__.py index e69de29b..7b787f94 100644 --- a/src/ga4gh/vrs/extras/annotator/__init__.py +++ b/src/ga4gh/vrs/extras/annotator/__init__.py @@ -0,0 +1 @@ +"""Provide tools for annotating data with corresponding VRS objects and attributes.""" From 2672e7b0c668a51703bfbc53ba64f19e610200b5 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 5 Feb 2025 11:43:41 -0500 Subject: [PATCH 11/12] pass those tests --- src/ga4gh/vrs/extras/annotator/vcf.py | 9 ++++----- tests/extras/test_annotate_vcf.py | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index b957001e..87dfbe9a 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -120,11 +120,10 @@ def _process_allele( "Exception encountered during translation of variation: %s", vcf_coords ) raise - else: - if vrs_obj is None: - _logger.debug( - "None was returned when translating %s from gnomad", vcf_coords - ) + if vrs_obj is None: + _logger.debug( + "None was returned when translating %s from gnomad", vcf_coords + ) if output_pickle and vrs_obj: key = vrs_data_key if vrs_data_key else vcf_coords diff --git a/tests/extras/test_annotate_vcf.py b/tests/extras/test_annotate_vcf.py index abf94b49..64cdf868 100644 --- a/tests/extras/test_annotate_vcf.py +++ b/tests/extras/test_annotate_vcf.py @@ -1,17 +1,31 @@ """Ensure proper functionality of VCFAnnotator""" import gzip +import logging +import os import re from pathlib import Path import pytest -from ga4gh.vrs.dataproxy import DataProxyValidationError, _DataProxy +from ga4gh.vrs.dataproxy import ( + DataProxyValidationError, + SeqRepoRESTDataProxy, + _DataProxy, +) from ga4gh.vrs.extras.annotator.vcf import VCFAnnotator, VCFAnnotatorError TEST_DATA_DIR = Path("tests/extras/data") +@pytest.fixture +def rest_dataproxy(): + """REST dataproxy scoped to individual test cases, rather than the entire session""" + return SeqRepoRESTDataProxy( + base_url=os.environ.get("SEQREPO_REST_URL", "http://localhost:5000/seqrepo") + ) + + @pytest.fixture def vcf_annotator(rest_dataproxy: _DataProxy): return VCFAnnotator(rest_dataproxy) @@ -169,6 +183,8 @@ def test_annotate_vcf_input_validation(vcf_annotator: VCFAnnotator, input_vcf: P @pytest.mark.vcr def test_get_vrs_object_invalid_input(vcf_annotator: VCFAnnotator, caplog): """Test that _get_vrs_object method works as expected with invalid input""" + caplog.set_level(logging.DEBUG) + # No CHROM vcf_annotator._process_allele(".-140753336-A-T", {}, {}, "GRCh38") assert "KeyError when getting refget accession: GRCh38:." in caplog.text From 14ecac4cce444d9e7b80054d83d1df5c95139a50 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 5 Feb 2025 14:24:25 -0500 Subject: [PATCH 12/12] more tweaks --- src/ga4gh/vrs/extras/annotator/vcf.py | 33 +++++++++++++++------------ tests/extras/test_annotate_vcf.py | 8 +++---- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index 87dfbe9a..9b703967 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -46,7 +46,7 @@ class VCFAnnotator: def __init__(self, data_proxy: _DataProxy) -> None: """Initialize the VCFAnnotator class. - :param data_proxy: + :param data_proxy: GA4GH sequence dataproxy instance. """ self.data_proxy = data_proxy self.tlr = AlleleTranslator(self.data_proxy) @@ -57,6 +57,13 @@ def _update_vcf_header( incl_ref_allele: bool, incl_vrs_attrs: bool, ) -> None: + """Add new fields to VCF header + + :param vcf: pysam VCF object to annotate + :param incl_ref_allele: whether VRS alleles will be calculated for REFs + :param incl_vrs_attrs: whether INFO properties should be defined for VRS attributes + (normalized coordinates/state) + """ info_field_num = "R" if incl_ref_allele else "A" info_field_desc = "REF and ALT" if incl_ref_allele else "ALT" vcf.header.info.add( @@ -99,8 +106,8 @@ def _process_allele( annotations: dict, assembly: str, vrs_data_key: str | None = None, - output_pickle: bool = True, - vrs_attributes: bool = False, + create_pickle: bool = True, + incl_vrs_attrs: bool = False, require_validation: bool = True, ) -> None: """Get VRS object given `vcf_coords`. `vrs_data` and `vrs_field_data` will @@ -125,7 +132,7 @@ def _process_allele( "None was returned when translating %s from gnomad", vcf_coords ) - if output_pickle and vrs_obj: + if create_pickle and vrs_obj: key = vrs_data_key if vrs_data_key else vcf_coords vrs_data[key] = str(vrs_obj.model_dump(exclude_none=True)) @@ -133,7 +140,7 @@ def _process_allele( allele_id = vrs_obj.id if vrs_obj else "" annotations[self.VRS_ALLELE_IDS_FIELD].append(allele_id) - if vrs_attributes: + if incl_vrs_attrs: if vrs_obj: start = str(vrs_obj.location.start) end = str(vrs_obj.location.end) @@ -143,9 +150,7 @@ def _process_allele( else "" ) else: - start = "" - end = "" - alt = "" + start = end = alt = "" annotations[self.VRS_STARTS_FIELD].append(start) annotations[self.VRS_ENDS_FIELD].append(end) @@ -159,7 +164,7 @@ def _process_vcf_row( vrs_info_fields: list[str], incl_vrs_attrs: bool, incl_ref_allele: bool, - output_pickle: bool, + create_pickle: bool, require_validation: bool, ) -> dict: """Compute VRS objects for a VCF row. @@ -180,8 +185,8 @@ def _process_vcf_row( vrs_data, info_field_annotations, assembly, - output_pickle=output_pickle, - vrs_attributes=incl_vrs_attrs, + create_pickle=create_pickle, + incl_vrs_attrs=incl_vrs_attrs, require_validation=require_validation, ) @@ -201,8 +206,8 @@ def _process_vcf_row( info_field_annotations, assembly, vrs_data_key=data_key, - output_pickle=output_pickle, - vrs_attributes=incl_vrs_attrs, + create_pickle=create_pickle, + incl_vrs_attrs=incl_vrs_attrs, require_validation=require_validation, ) @@ -274,7 +279,7 @@ def annotate( vrs_info_fields, incl_vrs_attrs=incl_vrs_attrs, incl_ref_allele=incl_ref_allele, - output_pickle=create_pkl, + create_pickle=create_pkl, require_validation=require_validation, ) except Exception as ex: diff --git a/tests/extras/test_annotate_vcf.py b/tests/extras/test_annotate_vcf.py index 64cdf868..f12a0b65 100644 --- a/tests/extras/test_annotate_vcf.py +++ b/tests/extras/test_annotate_vcf.py @@ -19,16 +19,16 @@ @pytest.fixture -def rest_dataproxy(): - """REST dataproxy scoped to individual test cases, rather than the entire session""" +def rest_dataproxy_fn_scope(): + """REST dataproxy scoped to individual test functions, rather than the entire session""" return SeqRepoRESTDataProxy( base_url=os.environ.get("SEQREPO_REST_URL", "http://localhost:5000/seqrepo") ) @pytest.fixture -def vcf_annotator(rest_dataproxy: _DataProxy): - return VCFAnnotator(rest_dataproxy) +def vcf_annotator(rest_dataproxy_fn_scope: _DataProxy): + return VCFAnnotator(rest_dataproxy_fn_scope) @pytest.fixture