From 7db7586f4a026b7b4190ecc86f27b2d59a76695b Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 4 Nov 2024 14:19:51 +0000 Subject: [PATCH] Change zip to fail if iterables are different lengths --- bio2zarr/plink.py | 4 ++-- bio2zarr/vcf2zarr/icf.py | 4 ++-- bio2zarr/vcf2zarr/vcz.py | 2 +- bio2zarr/vcf2zarr/verification.py | 2 +- bio2zarr/vcf_utils.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py index 157eb60..717bc7e 100644 --- a/bio2zarr/plink.py +++ b/bio2zarr/plink.py @@ -185,11 +185,11 @@ def validate(bed_path, zarr_path): assert call_genotype.shape[2] == 2 row_id = 0 - for bed_row, zarr_row in zip(bed_genotypes, call_genotype, strict=False): + for bed_row, zarr_row in zip(bed_genotypes, call_genotype, strict=True): # print("ROW", row_id) # print(bed_row, zarr_row) row_id += 1 - for bed_call, zarr_call in zip(bed_row, zarr_row, strict=False): + for bed_call, zarr_call in zip(bed_row, zarr_row, strict=True): if bed_call == -127: assert list(zarr_call) == [-1, -1] elif bed_call == 0: diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py index 7006cf0..de91f42 100644 --- a/bio2zarr/vcf2zarr/icf.py +++ b/bio2zarr/vcf2zarr/icf.py @@ -289,7 +289,7 @@ def scan_vcf(path, target_num_partitions, *, local_alleles): samples=[Sample(sample_id) for sample_id in vcf.samples], contigs=[ Contig(contig_id, length) - for contig_id, length in zip(vcf.seqnames, contig_lengths, strict=False) + for contig_id, length in zip(vcf.seqnames, contig_lengths, strict=True) ], filters=filters, fields=fields, @@ -766,7 +766,7 @@ def chunks(self, partition_id, start_chunk=0): for count, cumulative in zip( chunk_num_records[start_chunk:], chunk_cumulative_records[start_chunk + 1 :], - strict=False, + strict=True, ): path = partition_path / f"{cumulative}" chunk = self.read_chunk(path) diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index b170bff..d6eb961 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -760,7 +760,7 @@ def encode_alleles_partition(self, partition_index): for ref, alt in zip( ref_field.iter_values(partition.start, partition.stop), alt_field.iter_values(partition.start, partition.stop), - strict=False, + strict=True, ): j = alleles.next_buffer_row() alleles.buff[j, :] = constants.STR_FILL diff --git a/bio2zarr/vcf2zarr/verification.py b/bio2zarr/vcf2zarr/verification.py index 35ef914..b16c311 100644 --- a/bio2zarr/vcf2zarr/verification.py +++ b/bio2zarr/vcf2zarr/verification.py @@ -114,7 +114,7 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number): assert isinstance(vcf_val, np.ndarray) if vcf_type in ("String", "Character"): assert len(vcf_val) == len(zarr_val) - for v, z in zip(vcf_val, zarr_val, strict=False): + for v, z in zip(vcf_val, zarr_val, strict=True): if vcf_number == "1": assert v == z else: diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py index b586890..6c9c51b 100644 --- a/bio2zarr/vcf_utils.py +++ b/bio2zarr/vcf_utils.py @@ -450,7 +450,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): return False def contig_record_counts(self): - d = dict(zip(self.sequence_names, self.index.record_counts, strict=False)) + d = dict(zip(self.sequence_names, self.index.record_counts, strict=True)) if self.file_type == VcfFileType.BCF: d = {k: v for k, v in d.items() if v > 0} return d