Skip to content

Commit

Permalink
Merge pull request #180 from jeromekelleher/tests-with-indels
Browse files Browse the repository at this point in the history
Add explicit tests with indels
  • Loading branch information
jeromekelleher authored May 3, 2024
2 parents e8184f1 + 7698077 commit 27cbf9d
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 3 deletions.
2 changes: 1 addition & 1 deletion bio2zarr/vcf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,9 +441,9 @@ def count_variants(self, region):
return sum(1 for _ in self.variants(region))

def variants(self, region):
# Need to filter because of indels overlapping the region
start = 1 if region.start is None else region.start
for var in self.vcf(str(region)):
# Need to filter because of indels overlapping the region
if var.POS >= start:
yield var

Expand Down
Binary file added tests/data/vcf/chr_m_indels.vcf.gz
Binary file not shown.
Binary file added tests/data/vcf/chr_m_indels.vcf.gz.csi
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def test_5_chunk_1(self, n, expected):
# It *might* work in CI, but it may well not either, as it's
# probably dependent on a whole bunch of things. Expect to fail
# at some point.
("tests/data", 4960266),
("tests/data/vcf", 4948129),
("tests/data", 4973315),
("tests/data/vcf", 4961178),
("tests/data/vcf/sample.vcf.gz", 1089),
],
)
Expand Down
1 change: 1 addition & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,7 @@ def test_duplicate_paths(self, tmp_path):
"1kg_2020_chrM.vcf.gz",
"field_type_combos.vcf.gz",
"out_of_order_contigs.vcf.gz",
"chr_m_indels.vcf.gz",
],
)
def test_by_validating(name, tmp_path):
Expand Down
10 changes: 10 additions & 0 deletions tests/test_vcf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def test_context_manager_error(self):
with vcf_utils.IndexedVcf(data_path / "no-such-file.bcf"):
pass

def test_indels_filtered(self):
with vcf_utils.IndexedVcf(data_path / "chr_m_indels.vcf.gz") as vfile:
# Hand-picked example that results in filtering
region = vcf_utils.Region("chrM", 300, 314)
pos = [var.POS for var in vfile.variants(region)]
assert pos == [307, 308, 309, 312, 313, 314]

# values computed using bcftools index -s
@pytest.mark.parametrize(
("index_file", "expected"),
Expand Down Expand Up @@ -58,6 +65,7 @@ def test_context_manager_error(self):
("1kg_2020_chr20_annotations.bcf.csi", {"chr20": 21}),
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", {"20": 301778}),
("multi_contig.vcf.gz.tbi", {str(j): 933 for j in range(5)}),
("chr_m_indels.vcf.gz.csi", {"chrM": 155}),
],
)
def test_contig_record_counts(self, index_file, expected):
Expand All @@ -82,6 +90,7 @@ def test_contig_record_counts(self, index_file, expected):
("1kg_2020_chr20_annotations.bcf.csi", ["chr20:60070-"]),
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", ["20:60001-"]),
("multi_contig.vcf.gz.tbi", [f"{j}:1-" for j in range(5)]),
("chr_m_indels.vcf.gz.csi", ["chrM:26-"]),
],
)
def test_partition_into_one_part(self, index_file, expected):
Expand All @@ -106,6 +115,7 @@ def test_partition_into_one_part(self, index_file, expected):
("1kg_2020_chr20_annotations.bcf.csi", 1, 21),
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", 59, 301778),
("multi_contig.vcf.gz.tbi", 5, 5 * 933),
("chr_m_indels.vcf.gz.csi", 1, 155),
],
)
def test_partition_into_max_parts(self, index_file, num_expected, total_records):
Expand Down

0 comments on commit 27cbf9d

Please sign in to comment.