Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def test_vcf_output_with_output_option(tmp_path, args, vcf_file):
r"query -f '[%CHROM %POS %SAMPLE %GT %DP %GQ\n]' -r '20:1230236-' -i 'FMT/DP>3' -s 'NA00002,NA00003'", # noqa: E501
"sample.vcf.gz",
),
(
r"query -f '[%CHROM %POS %SAMPLE %GT %DP %GQ\n]' -r '20:1230236-' -i 'FMT/DP>3' -S tests/data/txt/samples.txt", # noqa: E501
"sample.vcf.gz",
),
],
)
def test_output(tmp_path, args, vcf_name):
Expand Down
11 changes: 10 additions & 1 deletion tests/test_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy.testing as nt
import pytest

from vcztools.samples import parse_samples
from vcztools.samples import parse_samples, parse_samples_file


@pytest.mark.parametrize(
Expand Down Expand Up @@ -41,3 +41,12 @@ def test_parse_samples(

nt.assert_array_equal(sample_ids, expected_sample_ids)
nt.assert_array_equal(samples_selection, expected_samples_selection)


def test_parse_samples_file():
nt.assert_array_equal(
parse_samples_file("tests/data/txt/samples.txt"), "NA00001,NA00003"
)
nt.assert_array_equal(
parse_samples_file("^tests/data/txt/samples.txt"), "^NA00001,NA00003"
)
35 changes: 19 additions & 16 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import click

from vcztools.samples import parse_samples_file

from . import plink, provenance, vcf_writer
from . import query as query_module
from . import stats as stats_module
Expand Down Expand Up @@ -76,6 +78,13 @@ def wrapper(*args, **kwargs):
default=None,
help="Samples to include.",
)
samples_file = click.option(
"-S",
"--samples-file",
type=str,
default=None,
help="File of sample names to include.",
)
targets = click.option(
"-t",
"--targets",
Expand Down Expand Up @@ -154,6 +163,7 @@ def index(path, nrecords, stats, zarr_backend_storage):
@regions
@force_samples
@samples
@samples_file
@targets
@include
@exclude
Expand All @@ -175,6 +185,7 @@ def query(
targets,
force_samples,
samples,
samples_file,
include,
exclude,
disable_automatic_newline,
Expand All @@ -200,6 +211,12 @@ def query(

if format is None:
raise click.UsageError("Missing option -f / --format")

if samples_file:
if samples is not None:
raise ValueError("vcztools does not support combining -s and -S")
samples = parse_samples_file(samples_file)

with handle_broken_pipe(output):
query_module.write_query(
path,
Expand Down Expand Up @@ -245,13 +262,7 @@ def query(
help="Do not recalculate INFO fields for the sample subset.",
)
@samples
@click.option(
"-S",
"--samples-file",
type=str,
default=None,
help="File of sample names to include.",
)
@samples_file
@click.option(
"-G",
"--drop-genotypes",
Expand Down Expand Up @@ -300,15 +311,7 @@ def view(
if samples_file:
if samples is not None:
raise ValueError("vcztools does not support combining -s and -S")

samples = ""
exclude_samples_file = samples_file.startswith("^")
samples_file = samples_file.lstrip("^")

with open(samples_file) as file:
if exclude_samples_file:
samples = "^" + samples
samples += ",".join(line.strip() for line in file.readlines())
samples = parse_samples_file(samples_file)

with handle_broken_pipe(output):
vcf_writer.write_vcf(
Expand Down
18 changes: 18 additions & 0 deletions vcztools/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,21 @@ def parse_samples(
samples_selection = np.setdiff1d(samples_selection, masked_sample_ids)
sample_ids = all_samples[samples_selection]
return sample_ids, samples_selection


def parse_samples_file(samples_file: str) -> str:
"""Parse a file of sample IDs.

Returns a comma-delimited string of sample IDs,
optionally preceeded by a ^ character to indicate complement.
"""
samples = ""
exclude_samples_file = samples_file.startswith("^")
samples_file = samples_file.lstrip("^")

with open(samples_file) as file:
if exclude_samples_file:
samples = "^" + samples
samples += ",".join(line.strip() for line in file.readlines())

return samples
Loading