Skip to content

Commit

Permalink
Fixup tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed May 7, 2024
1 parent 7437862 commit eeeceea
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 28 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 0.0.10 2024-05-XX
- Change output format of dexplode-init and dencode-init

# 0.0.9 2024-05-02

- Change on-disk format for explode and schema
Expand Down
28 changes: 14 additions & 14 deletions bio2zarr/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,9 +988,9 @@ def check_field_clobbering(icf_metadata):

@dataclasses.dataclass
class IcfWriteSummary:
partitions: int
samples: int
variants: int
num_partitions: int
num_samples: int
num_variants: int

def asdict(self):
return dataclasses.asdict(self)
Expand Down Expand Up @@ -1056,9 +1056,9 @@ def init(
with open(self.wip_path / "metadata.json", "w") as f:
json.dump(self.metadata.asdict(), f, indent=4)
return IcfWriteSummary(
partitions=self.num_partitions,
variants=icf_metadata.num_records,
samples=icf_metadata.num_samples,
num_partitions=self.num_partitions,
num_variants=icf_metadata.num_records,
num_samples=icf_metadata.num_samples,
)

def mkdirs(self):
Expand Down Expand Up @@ -1667,10 +1667,10 @@ def fromdict(d):

@dataclasses.dataclass
class VcfZarrWriteSummary:
partitions: int
samples: int
variants: int
chunks: int
num_partitions: int
num_samples: int
num_variants: int
num_chunks: int
max_encoding_memory: str

def asdict(self):
Expand Down Expand Up @@ -1765,10 +1765,10 @@ def init(
json.dump(self.metadata.asdict(), f, indent=4)

return VcfZarrWriteSummary(
variants=self.icf.num_records,
samples=self.icf.num_samples,
partitions=self.num_partitions,
chunks=total_chunks,
num_variants=self.icf.num_records,
num_samples=self.icf.num_samples,
num_partitions=self.num_partitions,
num_chunks=total_chunks,
max_encoding_memory=display_size(self.get_max_encoding_memory()),
)

Expand Down
24 changes: 18 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dataclasses
import json
from unittest import mock

Expand Down Expand Up @@ -47,6 +48,17 @@
DEFAULT_DENCODE_FINALISE_ARGS = dict(show_progress=True)


@dataclasses.dataclass
class FakeWorkSummary:
num_partitions: int

def asdict(self):
return dataclasses.asdict(self)

def asjson(self):
return json.dumps(self.asdict())


class TestWithMocks:
vcf_path = "tests/data/vcf/sample.vcf.gz"

Expand Down Expand Up @@ -263,7 +275,7 @@ def test_vcf_explode_missing_and_existing_vcf(self, mocked, tmp_path):
assert "'no_such_file' does not exist" in result.stderr
mocked.assert_not_called()

@mock.patch("bio2zarr.vcf.explode_init", return_value=5)
@mock.patch("bio2zarr.vcf.explode_init", return_value=FakeWorkSummary(5))
def test_vcf_dexplode_init(self, mocked, tmp_path):
runner = ct.CliRunner(mix_stderr=False)
icf_path = tmp_path / "icf"
Expand All @@ -274,7 +286,7 @@ def test_vcf_dexplode_init(self, mocked, tmp_path):
)
assert result.exit_code == 0
assert len(result.stderr) == 0
assert result.stdout == "5\n"
assert list(result.stdout.split()) == ["num_partitions", "5"]
mocked.assert_called_once_with(
str(icf_path),
(self.vcf_path,),
Expand Down Expand Up @@ -416,7 +428,7 @@ def test_encode(self, mocked, tmp_path):
**DEFAULT_ENCODE_ARGS,
)

@mock.patch("bio2zarr.vcf.encode_init", return_value=(10, 1024))
@mock.patch("bio2zarr.vcf.encode_init", return_value=FakeWorkSummary(10))
def test_dencode_init(self, mocked, tmp_path):
icf_path = tmp_path / "icf"
icf_path.mkdir()
Expand All @@ -428,7 +440,7 @@ def test_dencode_init(self, mocked, tmp_path):
catch_exceptions=False,
)
assert result.exit_code == 0
assert result.stdout == "10\t1 KiB\n"
assert list(result.stdout.split()) == ["num_partitions", "10"]
assert len(result.stderr) == 0
mocked.assert_called_once_with(
str(icf_path),
Expand Down Expand Up @@ -534,7 +546,7 @@ def test_dexplode(self, tmp_path, one_based):
catch_exceptions=False,
)
assert result.exit_code == 0
assert json.loads(result.stdout)["partitions"] == 3
assert json.loads(result.stdout)["num_partitions"] == 3

for j in range(3):
if one_based:
Expand Down Expand Up @@ -603,7 +615,7 @@ def test_dencode(self, tmp_path, one_based):
catch_exceptions=False,
)
assert result.exit_code == 0
assert json.loads(result.stdout)["partitions"] == 3
assert json.loads(result.stdout)["num_partitions"] == 3

for j in range(3):
if one_based:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_init_paths(self, tmp_path):
icf_path = tmp_path / "x.icf"
assert not icf_path.exists()
summary = vcf.explode_init(icf_path, [self.data_path])
assert summary.partitions == 3
assert summary.num_partitions == 3
assert icf_path.exists()
wip_path = icf_path / "wip"
assert wip_path.exists()
Expand All @@ -120,7 +120,7 @@ def test_finalise_paths(self, tmp_path):
wip_path = icf_path / "wip"
summary = vcf.explode_init(icf_path, [self.data_path])
assert icf_path.exists()
for j in range(summary.partitions):
for j in range(summary.num_partitions):
vcf.explode_partition(icf_path, j)
assert wip_path.exists()
vcf.explode_finalise(icf_path)
Expand Down Expand Up @@ -271,7 +271,7 @@ def run_explode(self, tmp_path, **kwargs):
def run_dexplode(self, tmp_path, **kwargs):
icf_path = tmp_path / "icf"
summary = vcf.explode_init(icf_path, [self.data_path], **kwargs)
for j in range(summary.partitions):
for j in range(summary.num_partitions):
vcf.explode_partition(icf_path, j)
vcf.explode_finalise(icf_path)
return vcf.IntermediateColumnarFormat(icf_path)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def test_init_paths(self, icf_path, tmp_path):
zarr_path = tmp_path / "x.zarr"
assert not zarr_path.exists()
summary = vcf.encode_init(icf_path, zarr_path, 7, variants_chunk_size=3)
assert summary.partitions == 3
assert summary.num_partitions == 3
assert zarr_path.exists()
wip_path = zarr_path / "wip"
assert wip_path.exists()
Expand All @@ -443,7 +443,7 @@ def test_finalise_paths(self, icf_path, tmp_path):
summary = vcf.encode_init(icf_path, zarr_path, 7, variants_chunk_size=3)
wip_path = zarr_path / "wip"
assert wip_path.exists()
for j in range(summary.partitions):
for j in range(summary.num_partitions):
vcf.encode_partition(zarr_path, j)
assert (wip_path / "partitions" / f"p{j}").exists()
vcf.encode_finalise(zarr_path)
Expand Down Expand Up @@ -527,7 +527,7 @@ def generate_vcf(self, path, info_field=None, format_field=None, num_rows=1):
pos = str(k + 1)
print("\t".join(["1", pos, "A", "T", ".", ".", ".", "."]), file=out)

print(open(path).read())
# print(open(path).read())
# This also compresses the input file
pysam.tabix_index(str(path), preset="vcf")

Expand Down
4 changes: 2 additions & 2 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,12 +873,12 @@ def test_split_explode(tmp_path):
]
out = tmp_path / "test.explode"
work_summary = vcf.explode_init(out, paths, target_num_partitions=15)
assert work_summary.partitions == 3
assert work_summary.num_partitions == 3

with pytest.raises(FileNotFoundError):
pcvcf = vcf.IntermediateColumnarFormat(out)

for j in range(work_summary.partitions):
for j in range(work_summary.num_partitions):
vcf.explode_partition(out, j)
vcf.explode_finalise(out)
pcvcf = vcf.IntermediateColumnarFormat(out)
Expand Down

0 comments on commit eeeceea

Please sign in to comment.