From 17428d0d6c2c3ba8b491eec9afe1fbd213ec377e Mon Sep 17 00:00:00 2001 From: Tom White Date: Wed, 18 Feb 2026 17:34:35 +0000 Subject: [PATCH] Test region index when variant alleles split across chunks --- tests/data/vcf/sample-split-alleles.vcf.gz | Bin 0 -> 1157 bytes .../data/vcf/sample-split-alleles.vcf.gz.csi | Bin 0 -> 186 bytes tests/test_vcf_writer.py | 25 ++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 tests/data/vcf/sample-split-alleles.vcf.gz create mode 100644 tests/data/vcf/sample-split-alleles.vcf.gz.csi diff --git a/tests/data/vcf/sample-split-alleles.vcf.gz b/tests/data/vcf/sample-split-alleles.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..6c457c5801100afba986c0520124cb03ef8a72ed GIT binary patch literal 1157 zcmV;01bX`)iwFb&00000{{{d;LjnNI0*#bWZ`v>vg`d-3aaFXZ0d>-KD{PsR0x3uk zC{Vh$$w>@}N)JM2?pEhFY; z+d)>WH8uK)6e~%y#ck8Z0?C$4vK(t{LdYY_MaI-5B7`g~h3caUqgV)ESZz68V)W;81}Q}B}N1kfzqIX~jO;1b+i zXv!f!LMi9WK+_x|(~e<<>(HoI6@2G_Z+?LHb~uQ{I@gAIgw+Ah2^ftM@P_kp+PW#x z2?Iql213T@i7Mc!(&f1vr|(WKUtO0KI;pt#EAD+RsD9VGFWKuFPjy=%cohRPE%9J0 z2n>VL5Bu$s6)6;c?`DGI%oBr|FZhD7}3Hxq3sYS z*F)WrbIFh?g$F`&xFerTJXD#vgl-tbhs>QIUhN9vX-#>mw-7Qe{(w#Pg7I|Z&3A&w z`33v(v;EoBuXdx{?Uc=fgZP?zqNtk-+Sl{+oN9F8$O?_ zx|kZ%GMkTsfWH~b^_jL4EY3w8QEU%)dP8MOL(H*cD;(SjBa!QLwbRZ}RbzNY3%sh6}A)gbicp5fLyIR)~Sw{2NHEn1#vNbnP9^QjAQqX{g0FAdp z-IyHRY@+4OK3$FQP}RiF=ISprUf!=P$G^kwseZx7%4Z?i_h5f(lUCcl1&28J>UR3m z@yMEtXV#!=O?$qDJ7+yTdqZoVT+3V|Yk#6v;Njy~s;lVi1^|16%>i7Zmy}(`d3fgub#mcw!Sc6H`1(ABzYC00000 X0RIL6LPG)o8vp|U0000000000%6cyB literal 0 HcmV?d00001 diff --git a/tests/data/vcf/sample-split-alleles.vcf.gz.csi b/tests/data/vcf/sample-split-alleles.vcf.gz.csi new file mode 100644 index 0000000000000000000000000000000000000000..e7334d90b421741019471eed50ec339fbbbf59ef GIT binary patch literal 186 zcmb2|=3rp}f&Xj_PR>jWa~X>HzNI`#PDn^#P7+l}V@h(<;AJ`$FpIf^S68x!M?y(r z#==kYCOa6VAM}!x6nGP(@z%7nm62`6h7-ny3z))I%-rdbcId|ThiOVTdORLY>$&l^ z<)wZ?+Xb17vTr_4FH&8M+$VE7`Oj=gFjne3e5SdTx#xU`%JBmePBxwOeevlG&kaU~ X4d>b)&S7L=kVkW!Gy^l(u^<8fmkL7I literal 0 HcmV?d00001 diff --git a/tests/test_vcf_writer.py b/tests/test_vcf_writer.py index 29e5a28..f3f6d35 100644 --- a/tests/test_vcf_writer.py +++ b/tests/test_vcf_writer.py @@ -191,6 +191,31 @@ def test_write_vcf__regions(tmp_path, regions, targets, expected_chrom_pos, assert variant.POS == pos +def test_write_vcf__regions_split_alleles(tmp_path): + original = pathlib.Path("tests/data/vcf") / "sample-split-alleles.vcf.gz" + # chunk size is chosen so that 20:1234567 (two alt alleles) spans two chunks + vcz = vcz_path_cache(original, variants_chunk_size=4) + output = tmp_path.joinpath("output.vcf") + + write_vcf(vcz, output, regions="20:1234567") + + v = VCF(output) + variants = list(v) + assert len(variants) == 2 + + variant = variants[0] + assert variant.CHROM == "20" + assert variant.POS == 1234567 + assert variant.REF == "G" + assert variant.ALT == ["GA"] + + variant = variants[1] + assert variant.CHROM == "20" + assert variant.POS == 1234567 + assert variant.REF == "G" + assert variant.ALT == ["GAC"] + + @pytest.mark.parametrize( ("samples", "force_samples", "expected_samples", "expected_genotypes"), [