Skip to content

Commit

Permalink
Merge branch 'main' into fix-io-error-notes
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Jan 2, 2024
2 parents 36c8e21 + 63e6b29 commit ff5c08f
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 40 deletions.
13 changes: 5 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
repos:
- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.1.7"
rev: v0.1.9
hooks:
- id: ruff
types_or: [python, pyi, jupyter]
args: ["--fix"]
- id: ruff-format
types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.4
rev: v4.0.0-alpha.8
hooks:
- id: prettier
exclude_types:
Expand All @@ -28,7 +26,6 @@ repos:
- id: detect-private-key
- id: no-commit-to-branch
args: ["--branch=main"]

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
Expand Down
12 changes: 9 additions & 3 deletions anndata/_core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,12 +1108,18 @@ def concat(
... X=sparse.csr_matrix(np.array([[0, 1], [2, 3]])),
... obs=pd.DataFrame({"group": ["a", "b"]}, index=["s1", "s2"]),
... var=pd.DataFrame(index=["var1", "var2"]),
... varm={"ones": np.ones((2, 5)), "rand": np.random.randn(2, 3), "zeros": np.zeros((2, 5))},
... varm={
... "ones": np.ones((2, 5)),
... "rand": np.random.randn(2, 3),
... "zeros": np.zeros((2, 5)),
... },
... uns={"a": 1, "b": 2, "c": {"c.a": 3, "c.b": 4}},
... )
>>> b = ad.AnnData(
... X=sparse.csr_matrix(np.array([[4, 5, 6], [7, 8, 9]])),
... obs=pd.DataFrame({"group": ["b", "c"], "measure": [1.2, 4.3]}, index=["s3", "s4"]),
... obs=pd.DataFrame(
... {"group": ["b", "c"], "measure": [1.2, 4.3]}, index=["s3", "s4"]
... ),
... var=pd.DataFrame(index=["var1", "var2", "var3"]),
... varm={"ones": np.ones((3, 5)), "rand": np.random.randn(3, 5)},
... uns={"a": 1, "b": 3, "c": {"c.b": 4}},
Expand Down Expand Up @@ -1147,7 +1153,7 @@ def concat(
>>> (inner.obs_names, inner.var_names) # doctest: +NORMALIZE_WHITESPACE
(Index(['s1', 's2', 's3', 's4'], dtype='object'),
Index(['var1', 'var2'], dtype='object'))
>>> outer = ad.concat([a, b], join="outer") # Joining on union of variables
>>> outer = ad.concat([a, b], join="outer") # Joining on union of variables
>>> outer
AnnData object with n_obs × n_vars = 4 × 3
obs: 'group', 'measure'
Expand Down
12 changes: 6 additions & 6 deletions anndata/_core/sparse_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,12 @@ def sparse_dataset(group: ZarrGroup | H5Group) -> CSRDataset | CSCDataset:
>>> import zarr
>>> from anndata.experimental import sparse_dataset
>>> group = zarr.open_group('./my_test_store.zarr')
>>> group['data'] = [10, 20, 30, 40, 50, 60, 70, 80]
>>> group['indices'] = [0, 1, 1, 3, 2, 3, 4, 5]
>>> group['indptr'] = [0, 2, 4, 7, 8]
>>> group.attrs['shape'] = (4, 6)
>>> group.attrs['encoding-type'] = 'csr_matrix'
>>> group = zarr.open_group("./my_test_store.zarr")
>>> group["data"] = [10, 20, 30, 40, 50, 60, 70, 80]
>>> group["indices"] = [0, 1, 1, 3, 2, 3, 4, 5]
>>> group["indptr"] = [0, 2, 4, 7, 8]
>>> group.attrs["shape"] = (4, 6)
>>> group.attrs["encoding-type"] = "csr_matrix"
>>> sparse_dataset(group)
CSRDataset: backend zarr, shape (4, 6), data_dtype int64
"""
Expand Down
18 changes: 12 additions & 6 deletions anndata/experimental/multi_files/_anncollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,12 @@ def convert(self):
::
{
'X': lambda a: a.toarray() if issparse(a) else a, # densify .X
'obsm': lambda a: np.asarray(a, dtype='float32'), # change dtype for all keys of .obsm
'obs': dict(key1 = lambda c: c.astype(str)) # change type only for one key of .obs
# densify .X
"X": lambda a: a.toarray() if issparse(a) else a,
# change dtype for all keys of .obsm
"obsm": lambda a: np.asarray(a, dtype="float32"),
# change type only for one key of .obs
"obs": dict(key1=lambda c: c.astype(str)),
}
"""
return self._convert
Expand Down Expand Up @@ -817,9 +820,12 @@ def convert(self):
::
{
'X': lambda a: a.toarray() if issparse(a) else a, # densify .X
'obsm': lambda a: np.asarray(a, dtype='float32'), # change dtype for all keys of .obsm
'obs': dict(key1 = lambda c: c.astype(str)) # change type only for one key of .obs
# densify .X
"X": lambda a: a.toarray() if issparse(a) else a,
# change dtype for all keys of .obsm
"obsm": lambda a: np.asarray(a, dtype="float32"),
# change type only for one key of .obs
"obs": dict(key1=lambda c: c.astype(str)),
}
"""
return self._convert
Expand Down
15 changes: 9 additions & 6 deletions docs/benchmark-read-write.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
"metadata": {},
"outputs": [],
"source": [
"import anndata as ad\n",
"import scanpy as sc"
"from __future__ import annotations\n",
"\n",
"import scanpy as sc\n",
"\n",
"import anndata as ad"
]
},
{
Expand Down Expand Up @@ -84,7 +87,7 @@
],
"source": [
"%%time\n",
"adata.write('test.h5ad')"
"adata.write(\"test.h5ad\")"
]
},
{
Expand All @@ -103,7 +106,7 @@
],
"source": [
"%%time\n",
"adata = ad.read_h5ad('test.h5ad')"
"adata = ad.read_h5ad(\"test.h5ad\")"
]
},
{
Expand All @@ -129,7 +132,7 @@
],
"source": [
"%%time\n",
"adata.write_loom('test.loom')"
"adata.write_loom(\"test.loom\")"
]
},
{
Expand All @@ -156,7 +159,7 @@
],
"source": [
"%%time\n",
"adata = ad.read_loom('test.loom')"
"adata = ad.read_loom(\"test.loom\")"
]
}
],
Expand Down
25 changes: 14 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,10 @@ testpaths = ["anndata", "docs/concatenation.rst"]
xfail_strict = true
markers = ["gpu: mark test to run on GPU"]

[tool.ruff]
ignore = [
# line too long -> we accept long comment lines; black gets rid of long code lines
"E501",
# Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments,
"E731",
# allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
"E741",
]
[tool.ruff.format]
docstring-code-format = true

[tool.ruff.lint]
select = [
"E", # Error detected by Pycodestyle
"F", # Errors detected by Pyflakes
Expand All @@ -152,10 +147,18 @@ select = [
"ICN", # Follow import conventions
"PTH", # Pathlib instead of os.path
]
[tool.ruff.per-file-ignores]
ignore = [
# line too long -> we accept long comment lines; formatter gets rid of long code lines
"E501",
# Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments,
"E731",
# allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
"E741",
]
[tool.ruff.lint.per-file-ignores]
# E721 comparing types, but we specifically are checking that we aren't getting subtypes (views)
"anndata/tests/test_readwrite.py" = ["E721"]
[tool.ruff.isort]
[tool.ruff.lint.isort]
known-first-party = ["anndata"]
required-imports = ["from __future__ import annotations"]

Expand Down

0 comments on commit ff5c08f

Please sign in to comment.