Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/extras/vcf_annotator.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# VCF Annotator

The [VCF Annotator tool](../../src/ga4gh/vrs/extras/vcf_annotation.py) provides utility for annotating VCF's with VRS Allele IDs.
The [VCF Annotator tool](../../src/ga4gh/vrs/extras/annotator/vcf.py) provides a Python class for annotating VCFs with VRS Allele IDs. A [command-line interface](../../src/ga4gh/vrs/extras/annotator/cli.py) is available for accessing these functions from a shell or shell script.

## How to use

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Source = "https://github.com/ga4gh/vrs-python"
"Bug Tracker" = "https://github.com/ga4gh/vrs-python/issues"

[project.scripts]
vrs-annotate = "ga4gh.vrs.extras.vcf_annotation:_cli"
vrs-annotate = "ga4gh.vrs.extras.annotator.cli:_cli"

[build-system]
requires = ["setuptools>=65.3", "setuptools_scm>=8"]
Expand Down Expand Up @@ -193,7 +193,7 @@ exclude = [
"ANN201",
"ANN202",
]
"src/ga4gh/vrs/extras/vcf_annotation.py" = [
"src/ga4gh/vrs/extras/annotator/vcf.py" = [
"PTH123", # see https://github.com/ga4gh/vrs-python/issues/482
]
"src/ga4gh/vrs/extras/object_store.py" = [
Expand Down
1 change: 1 addition & 0 deletions src/ga4gh/vrs/extras/annotator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide tools for annotating data with corresponding VRS objects and attributes."""
197 changes: 197 additions & 0 deletions src/ga4gh/vrs/extras/annotator/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
"""Define command-line interface for VRS annotator tool.

$ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl

"""

import logging
from collections.abc import Callable
from enum import Enum
from pathlib import Path
from timeit import default_timer as timer

import click

from ga4gh.vrs.extras.annotator.vcf import SeqRepoProxyType, VCFAnnotator

_logger = logging.getLogger(__name__)


@click.group()
def _cli() -> None:
"""Annotate input files with VRS variation objects."""
logging.basicConfig(
filename="vrs-annotate.log",
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)


class _LogLevel(str, Enum):
"""Define legal values for `--log_level` option."""

DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"


def _log_level_option(func: Callable) -> Callable:
"""Provide reusable log level CLI option decorator.

Adds a `--log_level` CLI option to any decorated command. Doesn't pass on any
values, just sets the logging level for this module.

:param func: incoming click command
:return: same command, wrapped with log level option
"""

def _set_log_level(ctx: dict, param: str, value: _LogLevel) -> None: # noqa: ARG001
level_map = {
_LogLevel.DEBUG: logging.DEBUG,
_LogLevel.INFO: logging.INFO,
_LogLevel.WARNING: logging.WARNING,
_LogLevel.ERROR: logging.ERROR,
_LogLevel.CRITICAL: logging.CRITICAL,
}
logging.getLogger(__name__).setLevel(level_map[value])

return click.option(
"--log_level",
type=click.Choice([v.value for v in _LogLevel.__members__.values()]),
default="info",
help="Set the logging level.",
callback=_set_log_level,
expose_value=False,
is_eager=True,
)(func)


@_cli.command(name="vcf")
@_log_level_option
@click.argument(
"vcf_in",
nargs=1,
type=click.Path(exists=True, readable=True, dir_okay=False, path_type=Path),
)
@click.option(
"--vcf_out",
required=False,
type=click.Path(writable=True, allow_dash=False, path_type=Path),
help=(
"Declare save location for output annotated VCF. If not provided, must provide --vrs_pickle_out."
),
)
@click.option(
"--vrs_pickle_out",
required=False,
type=click.Path(writable=True, allow_dash=False, path_type=Path),
help=(
"Declare save location for output VCF pickle. If not provided, must provide --vcf_out."
),
)
@click.option(
"--vrs_attributes",
is_flag=True,
default=False,
help="Include VRS_Start, VRS_End, and VRS_State fields in the VCF output INFO field.",
)
@click.option(
"--seqrepo_dp_type",
required=False,
default=SeqRepoProxyType.LOCAL,
type=click.Choice(
[v.value for v in SeqRepoProxyType.__members__.values()], case_sensitive=True
),
help="Specify type of SeqRepo dataproxy to use.",
show_default=True,
show_choices=True,
)
@click.option(
"--seqrepo_root_dir",
required=False,
default=Path("/usr/local/share/seqrepo/latest"),
type=click.Path(path_type=Path),
help="Define root directory for local SeqRepo instance, if --seqrepo_dp_type=local.",
show_default=True,
)
@click.option(
"--seqrepo_base_url",
required=False,
default="http://localhost:5000/seqrepo",
help="Specify base URL for SeqRepo REST API, if --seqrepo_dp_type=rest.",
show_default=True,
)
@click.option(
"--assembly",
required=False,
default="GRCh38",
show_default=True,
help="Specify assembly that was used to create input VCF.",
type=str,
)
@click.option(
"--skip_ref",
is_flag=True,
default=False,
help="Skip VRS computation for REF alleles.",
)
@click.option(
"--require_validation",
is_flag=True,
default=False,
help="Require validation checks to pass to construct a VRS object.",
)
@click.option(
"--silent",
"-s",
is_flag=True,
default=False,
help="Suppress messages printed to stdout",
)
def _annotate_vcf_cli(
vcf_in: Path,
vcf_out: Path | None,
vrs_pickle_out: Path | None,
vrs_attributes: bool,
seqrepo_dp_type: SeqRepoProxyType,
seqrepo_root_dir: Path,
seqrepo_base_url: str,
assembly: str,
skip_ref: bool,
require_validation: bool,
silent: bool,
) -> None:
"""Extract VRS objects from VCF located at VCF_IN.

$ vrs-annotate vcf input.vcf.gz --vcf_out output.vcf.gz --vrs_pickle_out vrs_objects.pkl

Note that at least one of --vcf_out or --vrs_pickle_out must be selected and defined.
"""
annotator = VCFAnnotator(
seqrepo_dp_type, seqrepo_base_url, str(seqrepo_root_dir.absolute())
)
vcf_out_str = str(vcf_out.absolute()) if vcf_out is not None else vcf_out
vrs_pkl_out_str = (
str(vrs_pickle_out.absolute()) if vrs_pickle_out is not None else vrs_pickle_out
)
start = timer()
msg = f"Annotating {vcf_in} with the VCF Annotator..."
_logger.info(msg)
if not silent:
click.echo(msg)
annotator.annotate(
str(vcf_in.absolute()),
vcf_out=vcf_out_str,
vrs_pickle_out=vrs_pkl_out_str,
vrs_attributes=vrs_attributes,
assembly=assembly,
compute_for_ref=(not skip_ref),
require_validation=require_validation,
)
end = timer()
msg = f"VCF Annotator finished in {(end - start):.5f} seconds"
_logger.info(msg)
if not silent:
click.echo(msg)
Loading
Loading