Fixup tests

sgkit-dev · May 7, 2024 · eeeceea · eeeceea
1 parent 7437862
commit eeeceea
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 28 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,6 @@
+# 0.0.10 2024-05-XX
+- Change output format of dexplode-init and dencode-init
+
 # 0.0.9 2024-05-02
 
 - Change on-disk format for explode and schema

diff --git a/bio2zarr/vcf.py b/bio2zarr/vcf.py
@@ -988,9 +988,9 @@ def check_field_clobbering(icf_metadata):
 
 @dataclasses.dataclass
 class IcfWriteSummary:
-    partitions: int
-    samples: int
-    variants: int
+    num_partitions: int
+    num_samples: int
+    num_variants: int
 
     def asdict(self):
         return dataclasses.asdict(self)
@@ -1056,9 +1056,9 @@ def init(
         with open(self.wip_path / "metadata.json", "w") as f:
             json.dump(self.metadata.asdict(), f, indent=4)
         return IcfWriteSummary(
-            partitions=self.num_partitions,
-            variants=icf_metadata.num_records,
-            samples=icf_metadata.num_samples,
+            num_partitions=self.num_partitions,
+            num_variants=icf_metadata.num_records,
+            num_samples=icf_metadata.num_samples,
         )
 
     def mkdirs(self):
@@ -1667,10 +1667,10 @@ def fromdict(d):
 
 @dataclasses.dataclass
 class VcfZarrWriteSummary:
-    partitions: int
-    samples: int
-    variants: int
-    chunks: int
+    num_partitions: int
+    num_samples: int
+    num_variants: int
+    num_chunks: int
     max_encoding_memory: str
 
     def asdict(self):
@@ -1765,10 +1765,10 @@ def init(
             json.dump(self.metadata.asdict(), f, indent=4)
 
         return VcfZarrWriteSummary(
-            variants=self.icf.num_records,
-            samples=self.icf.num_samples,
-            partitions=self.num_partitions,
-            chunks=total_chunks,
+            num_variants=self.icf.num_records,
+            num_samples=self.icf.num_samples,
+            num_partitions=self.num_partitions,
+            num_chunks=total_chunks,
             max_encoding_memory=display_size(self.get_max_encoding_memory()),
         )
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,3 +1,4 @@
+import dataclasses
 import json
 from unittest import mock
 
@@ -47,6 +48,17 @@
 DEFAULT_DENCODE_FINALISE_ARGS = dict(show_progress=True)
 
 
+@dataclasses.dataclass
+class FakeWorkSummary:
+    num_partitions: int
+
+    def asdict(self):
+        return dataclasses.asdict(self)
+
+    def asjson(self):
+        return json.dumps(self.asdict())
+
+
 class TestWithMocks:
     vcf_path = "tests/data/vcf/sample.vcf.gz"
 
@@ -263,7 +275,7 @@ def test_vcf_explode_missing_and_existing_vcf(self, mocked, tmp_path):
         assert "'no_such_file' does not exist" in result.stderr
         mocked.assert_not_called()
 
-    @mock.patch("bio2zarr.vcf.explode_init", return_value=5)
+    @mock.patch("bio2zarr.vcf.explode_init", return_value=FakeWorkSummary(5))
     def test_vcf_dexplode_init(self, mocked, tmp_path):
         runner = ct.CliRunner(mix_stderr=False)
         icf_path = tmp_path / "icf"
@@ -274,7 +286,7 @@ def test_vcf_dexplode_init(self, mocked, tmp_path):
         )
         assert result.exit_code == 0
         assert len(result.stderr) == 0
-        assert result.stdout == "5\n"
+        assert list(result.stdout.split()) == ["num_partitions", "5"]
         mocked.assert_called_once_with(
             str(icf_path),
             (self.vcf_path,),
@@ -416,7 +428,7 @@ def test_encode(self, mocked, tmp_path):
             **DEFAULT_ENCODE_ARGS,
         )
 
-    @mock.patch("bio2zarr.vcf.encode_init", return_value=(10, 1024))
+    @mock.patch("bio2zarr.vcf.encode_init", return_value=FakeWorkSummary(10))
     def test_dencode_init(self, mocked, tmp_path):
         icf_path = tmp_path / "icf"
         icf_path.mkdir()
@@ -428,7 +440,7 @@ def test_dencode_init(self, mocked, tmp_path):
             catch_exceptions=False,
         )
         assert result.exit_code == 0
-        assert result.stdout == "10\t1 KiB\n"
+        assert list(result.stdout.split()) == ["num_partitions", "10"]
         assert len(result.stderr) == 0
         mocked.assert_called_once_with(
             str(icf_path),
@@ -534,7 +546,7 @@ def test_dexplode(self, tmp_path, one_based):
             catch_exceptions=False,
         )
         assert result.exit_code == 0
-        assert json.loads(result.stdout)["partitions"] == 3
+        assert json.loads(result.stdout)["num_partitions"] == 3
 
         for j in range(3):
             if one_based:
@@ -603,7 +615,7 @@ def test_dencode(self, tmp_path, one_based):
             catch_exceptions=False,
         )
         assert result.exit_code == 0
-        assert json.loads(result.stdout)["partitions"] == 3
+        assert json.loads(result.stdout)["num_partitions"] == 3
 
         for j in range(3):
             if one_based:

diff --git a/tests/test_icf.py b/tests/test_icf.py
@@ -106,7 +106,7 @@ def test_init_paths(self, tmp_path):
         icf_path = tmp_path / "x.icf"
         assert not icf_path.exists()
         summary = vcf.explode_init(icf_path, [self.data_path])
-        assert summary.partitions == 3
+        assert summary.num_partitions == 3
         assert icf_path.exists()
         wip_path = icf_path / "wip"
         assert wip_path.exists()
@@ -120,7 +120,7 @@ def test_finalise_paths(self, tmp_path):
         wip_path = icf_path / "wip"
         summary = vcf.explode_init(icf_path, [self.data_path])
         assert icf_path.exists()
-        for j in range(summary.partitions):
+        for j in range(summary.num_partitions):
             vcf.explode_partition(icf_path, j)
         assert wip_path.exists()
         vcf.explode_finalise(icf_path)
@@ -271,7 +271,7 @@ def run_explode(self, tmp_path, **kwargs):
     def run_dexplode(self, tmp_path, **kwargs):
         icf_path = tmp_path / "icf"
         summary = vcf.explode_init(icf_path, [self.data_path], **kwargs)
-        for j in range(summary.partitions):
+        for j in range(summary.num_partitions):
             vcf.explode_partition(icf_path, j)
         vcf.explode_finalise(icf_path)
         return vcf.IntermediateColumnarFormat(icf_path)

diff --git a/tests/test_vcf.py b/tests/test_vcf.py
@@ -421,7 +421,7 @@ def test_init_paths(self, icf_path, tmp_path):
         zarr_path = tmp_path / "x.zarr"
         assert not zarr_path.exists()
         summary = vcf.encode_init(icf_path, zarr_path, 7, variants_chunk_size=3)
-        assert summary.partitions == 3
+        assert summary.num_partitions == 3
         assert zarr_path.exists()
         wip_path = zarr_path / "wip"
         assert wip_path.exists()
@@ -443,7 +443,7 @@ def test_finalise_paths(self, icf_path, tmp_path):
         summary = vcf.encode_init(icf_path, zarr_path, 7, variants_chunk_size=3)
         wip_path = zarr_path / "wip"
         assert wip_path.exists()
-        for j in range(summary.partitions):
+        for j in range(summary.num_partitions):
             vcf.encode_partition(zarr_path, j)
             assert (wip_path / "partitions" / f"p{j}").exists()
         vcf.encode_finalise(zarr_path)
@@ -527,7 +527,7 @@ def generate_vcf(self, path, info_field=None, format_field=None, num_rows=1):
                 pos = str(k + 1)
                 print("\t".join(["1", pos, "A", "T", ".", ".", ".", "."]), file=out)
 
-        print(open(path).read())
+        # print(open(path).read())
         # This also compresses the input file
         pysam.tabix_index(str(path), preset="vcf")
 

diff --git a/tests/test_vcf_examples.py b/tests/test_vcf_examples.py
@@ -873,12 +873,12 @@ def test_split_explode(tmp_path):
     ]
     out = tmp_path / "test.explode"
     work_summary = vcf.explode_init(out, paths, target_num_partitions=15)
-    assert work_summary.partitions == 3
+    assert work_summary.num_partitions == 3
 
     with pytest.raises(FileNotFoundError):
         pcvcf = vcf.IntermediateColumnarFormat(out)
 
-    for j in range(work_summary.partitions):
+    for j in range(work_summary.num_partitions):
         vcf.explode_partition(out, j)
     vcf.explode_finalise(out)
     pcvcf = vcf.IntermediateColumnarFormat(out)