diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index d9d02ad..46b6ad4 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d7f771..deeebf5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: # Use macos-13 because pip binary packages for ARM aren't # available for many dependencies os: [macos-13, macos-14, ubuntu-latest] - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] exclude: # Just run macos tests on one Python version - os: macos-13 @@ -33,6 +33,8 @@ jobs: python-version: "3.11" - os: macos-13 python-version: "3.12" + - os: macos-14 + python-version: "3.9" - os: macos-14 python-version: "3.10" - os: macos-14 diff --git a/bio2zarr/core.py b/bio2zarr/core.py index ae1d8c4..ac4fc29 100644 --- a/bio2zarr/core.py +++ b/bio2zarr/core.py @@ -7,8 +7,10 @@ import multiprocessing import os import os.path +import sys import threading import time +import warnings import humanfriendly import numcodecs @@ -214,6 +216,22 @@ def setup_progress_counter(counter): _progress_counter = counter +def warn_py39_mac(): + if sys.platform == "darwin" and sys.version_info[:2] == (3, 9): + warnings.warn( + "There is a known issue with bio2zarr on MacOS Python 3.9 " + "in which OS-level named semaphores are leaked. " + "You will also probably see warnings like 'There appear to be N " + "leaked semaphore objects at shutdown'. " + "While this is likely harmless for a few runs, it could lead to " + "issues if you do a lot of conversion. To get prevent this issue " + "either: (1) use --worker-processes=0 or (2) upgrade to a newer " + "Python version. See https://github.com/sgkit-dev/bio2zarr/issues/209 " + "for more details.", + stacklevel=2, + ) + + class ParallelWorkManager(contextlib.AbstractContextManager): def __init__(self, worker_processes=1, progress_config=None): # Need to specify this explicitly to suppport Macs and @@ -226,6 +244,7 @@ def __init__(self, worker_processes=1, progress_config=None): # production. See note on the SynchronousExecutor class. self.executor = SynchronousExecutor() else: + warn_py39_mac() self.executor = cf.ProcessPoolExecutor( max_workers=worker_processes, mp_context=ctx, diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py index 717bc7e..f558ebd 100644 --- a/bio2zarr/plink.py +++ b/bio2zarr/plink.py @@ -185,11 +185,11 @@ def validate(bed_path, zarr_path): assert call_genotype.shape[2] == 2 row_id = 0 - for bed_row, zarr_row in zip(bed_genotypes, call_genotype, strict=True): + for bed_row, zarr_row in zip(bed_genotypes, call_genotype): # print("ROW", row_id) # print(bed_row, zarr_row) row_id += 1 - for bed_call, zarr_call in zip(bed_row, zarr_row, strict=True): + for bed_call, zarr_call in zip(bed_row, zarr_row): if bed_call == -127: assert list(zarr_call) == [-1, -1] elif bed_call == 0: diff --git a/bio2zarr/typing.py b/bio2zarr/typing.py index 527e4e2..35e595d 100644 --- a/bio2zarr/typing.py +++ b/bio2zarr/typing.py @@ -1,3 +1,4 @@ from pathlib import Path +from typing import Union -PathType = str | Path +PathType = Union[str, Path] diff --git a/bio2zarr/vcf2zarr/icf.py b/bio2zarr/vcf2zarr/icf.py index de91f42..8e313f8 100644 --- a/bio2zarr/vcf2zarr/icf.py +++ b/bio2zarr/vcf2zarr/icf.py @@ -289,7 +289,7 @@ def scan_vcf(path, target_num_partitions, *, local_alleles): samples=[Sample(sample_id) for sample_id in vcf.samples], contigs=[ Contig(contig_id, length) - for contig_id, length in zip(vcf.seqnames, contig_lengths, strict=True) + for contig_id, length in zip(vcf.seqnames, contig_lengths) ], filters=filters, fields=fields, @@ -764,9 +764,7 @@ def chunks(self, partition_id, start_chunk=0): chunk_cumulative_records = self.chunk_record_index(partition_id) chunk_num_records = np.diff(chunk_cumulative_records) for count, cumulative in zip( - chunk_num_records[start_chunk:], - chunk_cumulative_records[start_chunk + 1 :], - strict=True, + chunk_num_records[start_chunk:], chunk_cumulative_records[start_chunk + 1 :] ): path = partition_path / f"{cumulative}" chunk = self.read_chunk(path) diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index d6eb961..dcaef3f 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -760,7 +760,6 @@ def encode_alleles_partition(self, partition_index): for ref, alt in zip( ref_field.iter_values(partition.start, partition.stop), alt_field.iter_values(partition.start, partition.stop), - strict=True, ): j = alleles.next_buffer_row() alleles.buff[j, :] = constants.STR_FILL diff --git a/bio2zarr/vcf2zarr/verification.py b/bio2zarr/vcf2zarr/verification.py index b16c311..27e86fe 100644 --- a/bio2zarr/vcf2zarr/verification.py +++ b/bio2zarr/vcf2zarr/verification.py @@ -114,7 +114,7 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number): assert isinstance(vcf_val, np.ndarray) if vcf_type in ("String", "Character"): assert len(vcf_val) == len(zarr_val) - for v, z in zip(vcf_val, zarr_val, strict=True): + for v, z in zip(vcf_val, zarr_val): if vcf_number == "1": assert v == z else: diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py index 6c9c51b..70dfea2 100644 --- a/bio2zarr/vcf_utils.py +++ b/bio2zarr/vcf_utils.py @@ -7,7 +7,7 @@ from collections.abc import Sequence from dataclasses import dataclass from enum import Enum -from typing import IO, Any +from typing import IO, Any, Optional, Union import cyvcf2 import humanfriendly @@ -33,7 +33,7 @@ def get_file_offset(vfp: int) -> int: return vfp >> 16 & address_mask -def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Any | None = None) -> Any: +def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Optional[Any] = None) -> Any: """Read bytes using a `struct` format string and return the unpacked data value. Parameters @@ -85,8 +85,8 @@ class Region: """ contig: str - start: int | None = None - end: int | None = None + start: Optional[int] = None + end: Optional[int] = None def __post_init__(self): if self.start is not None: @@ -194,7 +194,9 @@ def get_first_locus_in_bin(csi: CSIIndex, bin: int) -> int: return (bin - first_bin_on_level) * (max_span // level_size) + 1 -def read_csi(file: PathType, storage_options: dict[str, str] | None = None) -> CSIIndex: +def read_csi( + file: PathType, storage_options: Optional[dict[str, str]] = None +) -> CSIIndex: """Parse a CSI file into a `CSIIndex` object. Parameters @@ -309,7 +311,7 @@ def offsets(self) -> Any: def read_tabix( - file: PathType, storage_options: dict[str, str] | None = None + file: PathType, storage_options: Optional[dict[str, str]] = None ) -> TabixIndex: """Parse a tabix file into a `TabixIndex` object. @@ -450,7 +452,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): return False def contig_record_counts(self): - d = dict(zip(self.sequence_names, self.index.record_counts, strict=True)) + d = dict(zip(self.sequence_names, self.index.record_counts)) if self.file_type == VcfFileType.BCF: d = {k: v for k, v in d.items() if v > 0} return d @@ -481,8 +483,8 @@ def _filter_empty_and_refine(self, regions): def partition_into_regions( self, - num_parts: int | None = None, - target_part_size: None | int | str = None, + num_parts: Optional[int] = None, + target_part_size: Union[None, int, str] = None, ): if num_parts is None and target_part_size is None: raise ValueError("One of num_parts or target_part_size must be specified") diff --git a/pyproject.toml b/pyproject.toml index 4d6951c..d911bcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "cyvcf2", "bed_reader", ] -requires-python = ">=3.10" +requires-python = ">=3.9" classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", @@ -35,6 +35,7 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -73,8 +74,8 @@ testpaths = "tests" addopts = "--cov=bio2zarr --cov-report term-missing" [tool.ruff] -# Assume Python 3.10 -target-version = "py310" +# Assume Python 3.9 +target-version = "py39" # Same as Black. line-length = 88