From 02b595d0d3265473926bbb5a1acf9b077c0230e3 Mon Sep 17 00:00:00 2001 From: Tom White Date: Wed, 3 Jul 2024 16:30:15 +0100 Subject: [PATCH] Make chunks have the same number of dimensions as shape --- bio2zarr/vcf2zarr/vcz.py | 13 +++++++++++-- tests/test_vcz.py | 6 +++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index 770bfe6..f0cd1e5 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -98,6 +98,7 @@ def from_field( # TODO make an option to add in the empty extra dimension if vcf_field.summary.max_number > 1: shape.append(vcf_field.summary.max_number) + chunks.append(vcf_field.summary.max_number) # TODO we should really be checking this to see if the named dimensions # are actually correct. if vcf_field.vcf_number == "R": @@ -251,7 +252,12 @@ def spec_from_field(field, array_name=None): ) def fixed_field_spec( - name, dtype, vcf_field=None, shape=(m,), dimensions=("variants",) + name, + dtype, + vcf_field=None, + shape=(m,), + dimensions=("variants",), + chunks=None, ): return ZarrArraySpec.new( vcf_field=vcf_field, @@ -260,7 +266,7 @@ def fixed_field_spec( shape=shape, description="", dimensions=dimensions, - chunks=[variants_chunk_size], + chunks=chunks or [variants_chunk_size], ) alt_field = icf.fields["ALT"] @@ -276,12 +282,14 @@ def fixed_field_spec( dtype="bool", shape=(m, icf.metadata.num_filters), dimensions=["variants", "filters"], + chunks=(variants_chunk_size, icf.metadata.num_filters), ), fixed_field_spec( name="variant_allele", dtype="O", shape=(m, max_alleles), dimensions=["variants", "alleles"], + chunks=(variants_chunk_size, max_alleles), ), fixed_field_spec( name="variant_id", @@ -329,6 +337,7 @@ def fixed_field_spec( ) ) shape += [ploidy] + chunks += [ploidy] dimensions += ["ploidy"] array_specs.append( ZarrArraySpec.new( diff --git a/tests/test_vcz.py b/tests/test_vcz.py index ab01cc5..f4b1843 100644 --- a/tests/test_vcz.py +++ b/tests/test_vcz.py @@ -313,7 +313,7 @@ def test_call_genotype(self, schema): "name": "call_genotype", "dtype": "i1", "shape": (9, 3, 2), - "chunks": (10000, 1000), + "chunks": (10000, 1000, 2), "dimensions": ("variants", "samples", "ploidy"), "description": "", "vcf_field": None, @@ -332,7 +332,7 @@ def test_call_genotype_mask(self, schema): "name": "call_genotype_mask", "dtype": "bool", "shape": (9, 3, 2), - "chunks": (10000, 1000), + "chunks": (10000, 1000, 2), "dimensions": ("variants", "samples", "ploidy"), "description": "", "vcf_field": None, @@ -351,7 +351,7 @@ def test_call_genotype_phased(self, schema): "name": "call_genotype_mask", "dtype": "bool", "shape": (9, 3, 2), - "chunks": (10000, 1000), + "chunks": (10000, 1000, 2), "dimensions": ("variants", "samples", "ploidy"), "description": "", "vcf_field": None,