From c48e94af34d6eee3c1f059fcf3735f44dd04ca41 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 23 Apr 2024 23:25:49 +0100 Subject: [PATCH] Fixup max_v_chunks --- bio2zarr/vcf.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bio2zarr/vcf.py b/bio2zarr/vcf.py index e3b8791..86dc7d6 100644 --- a/bio2zarr/vcf.py +++ b/bio2zarr/vcf.py @@ -1670,7 +1670,7 @@ def init( root = zarr.group(store=store) for column in self.schema.columns.values(): - self.init_array(root, column) + self.init_array(root, column, partitions[-1].stop_index) logger.info("Writing WIP metadata") with open(self.wip_path / "metadata.json", "w") as f: @@ -1718,13 +1718,16 @@ def encode_filter_id(self, root): ) array.attrs["_ARRAY_DIMENSIONS"] = ["filters"] - def init_array(self, root, variable): + def init_array(self, root, variable, variants_dim_size): object_codec = None if variable.dtype == "O": object_codec = numcodecs.VLenUTF8() + shape = list(variable.shape) + # Truncate the variants dimension is max_variant_chunks was specified + shape[0] = variants_dim_size a = root.empty( variable.name, - shape=variable.shape, + shape=shape, chunks=variable.chunks, dtype=variable.dtype, compressor=numcodecs.get_codec(variable.compressor),