Skip to content

Commit

Permalink
Add check for badly formatted vcz.
Browse files Browse the repository at this point in the history
Just looks for the dimensions of the call_genotype array, but that's most likely to be wrong (e.g. 2d by mistake)
  • Loading branch information
hyanwong committed Sep 5, 2024
1 parent 21fbe57 commit 64684c5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
8 changes: 8 additions & 0 deletions tests/test_variantdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,14 @@ def test_sgkit_ancestor(small_sd_fixture, tmp_path):


class TestVariantDataErrors:
def test_bad_zarr_spec(self):
ds = zarr.group()
ds["call_genotype"] = zarr.array(np.zeros(10, dtype=np.int8))
with pytest.raises(
ValueError, match="Expecting a VCF Zarr object with 3D call_genotype array"
):
tsinfer.VariantData(ds, np.zeros(10, dtype="<U1"))

def test_missing_phase(self, tmp_path):
path = tmp_path / "data.zarr"
ds = sgkit.simulate_genotype_call_dataset(n_variant=3, n_sample=3)
Expand Down
5 changes: 5 additions & 0 deletions tsinfer/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2311,6 +2311,11 @@ def __init__(
# Assumed to be a VCF Zarr hierarchy
self.path = None
self.data = path_or_zarr
else:
raise ValueError(
"Expecting a VCF Zarr object with 3D call_genotype array: "
"see https://github.com/sgkit-dev/vcf-zarr-spec/"
)
except AttributeError:
self.path = path_or_zarr
self.data = zarr.open(path_or_zarr, mode="r")
Expand Down

0 comments on commit 64684c5

Please sign in to comment.