(chore): Update to Ruff 0.9 and add EM lints (#3437)

scverse · Jan 13, 2025 · f7acd02 · f7acd02
1 parent 66f1b61
commit f7acd02
Show file tree

Hide file tree

Showing 81 changed files with 662 additions and 505 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.9.1
     hooks:
     -   id: ruff
         args: ["--fix"]

diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py
@@ -71,7 +71,9 @@ def extract_min_deps(
 
         # If we are referring to other optional dependency lists, resolve them
         if req.name == project_name:
-            assert req.extras, f"Project included itself as dependency, without specifying extras: {req}"
+            assert req.extras, (
+                f"Project included itself as dependency, without specifying extras: {req}"
+            )
             for extra in req.extras:
                 extra_deps = pyproject["project"]["optional-dependencies"][extra]
                 dependencies += map(Requirement, extra_deps)

diff --git a/docs/extensions/param_police.py b/docs/extensions/param_police.py
@@ -37,7 +37,8 @@ def show_param_warnings(app, exception):
             line,
         )
     if param_warnings:
-        raise RuntimeError("Encountered text parameter type. Use annotations.")
+        msg = "Encountered text parameter type. Use annotations."
+        raise RuntimeError(msg)
 
 
 def setup(app: Sphinx):

diff --git a/pyproject.toml b/pyproject.toml
@@ -230,7 +230,7 @@ select = [
     "W",       # Warning detected by Pycodestyle
     "UP",      # pyupgrade
     "I",       # isort
-    "TCH",     # manage type checking blocks
+    "TC",      # manage type checking blocks
     "TID251",  # Banned imports
     "ICN",     # Follow import conventions
     "PTH",     # Pathlib instead of os.path
@@ -239,6 +239,7 @@ select = [
     "FBT",     # No positional boolean parameters
     "PT",      # Pytest style
     "SIM",     # Simplify control flow
+    "EM",      # Traceback-friendly error messages
 ]
 ignore = [
     # line too long -> we accept long comment lines; black gets rid of long code lines

diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py
@@ -15,9 +15,8 @@
     try:
         from ._version import __version__
     except ModuleNotFoundError:
-        raise RuntimeError(
-            "scanpy is not correctly installed. Please install it, e.g. with pip."
-        )
+        msg = "scanpy is not correctly installed. Please install it, e.g. with pip."
+        raise RuntimeError(msg)
 
 from ._utils import check_versions
 

diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py
@@ -83,7 +83,8 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]):
     else:
         type_names = [t.__name__ for t in types]
         possible_types_str = f"{', '.join(type_names[:-1])} or {type_names[-1]}"
-    raise TypeError(f"{varname} must be of type {possible_types_str}")
+    msg = f"{varname} must be of type {possible_types_str}"
+    raise TypeError(msg)
 
 
 class ScanpyConfig:
@@ -180,10 +181,11 @@ def verbosity(self, verbosity: Verbosity | int | str):
         elif isinstance(verbosity, str):
             verbosity = verbosity.lower()
             if verbosity not in verbosity_str_options:
-                raise ValueError(
+                msg = (
                     f"Cannot set verbosity to {verbosity}. "
                     f"Accepted string values are: {verbosity_str_options}"
                 )
+                raise ValueError(msg)
             else:
                 self._verbosity = Verbosity(verbosity_str_options.index(verbosity))
         else:
@@ -214,10 +216,11 @@ def file_format_data(self, file_format: str):
         _type_check(file_format, "file_format_data", str)
         file_format_options = {"txt", "csv", "h5ad"}
         if file_format not in file_format_options:
-            raise ValueError(
+            msg = (
                 f"Cannot set file_format_data to {file_format}. "
                 f"Must be one of {file_format_options}"
             )
+            raise ValueError(msg)
         self._file_format_data = file_format
 
     @property
@@ -322,10 +325,11 @@ def cache_compression(self) -> str | None:
     @cache_compression.setter
     def cache_compression(self, cache_compression: str | None):
         if cache_compression not in {"lzf", "gzip", None}:
-            raise ValueError(
+            msg = (
                 f"`cache_compression` ({cache_compression}) "
                 "must be in {'lzf', 'gzip', None}"
             )
+            raise ValueError(msg)
         self._cache_compression = cache_compression
 
     @property

diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py
@@ -93,11 +93,12 @@ def __getattr__(self, attr: str):
 def ensure_igraph() -> None:
     if importlib.util.find_spec("igraph"):
         return
-    raise ImportError(
+    msg = (
         "Please install the igraph package: "
         "`conda install -c conda-forge python-igraph` or "
         "`pip3 install igraph`."
     )
+    raise ImportError(msg)
 
 
 @contextmanager
@@ -120,10 +121,11 @@ def check_versions():
     if Version(anndata_version) < Version("0.6.10"):
         from .. import __version__
 
-        raise ImportError(
+        msg = (
             f"Scanpy {__version__} needs anndata version >=0.6.10, "
             f"not {anndata_version}.\nRun `pip install anndata -U --no-deps`."
         )
+        raise ImportError(msg)
 
 
 def getdoc(c_or_f: Callable | type) -> str | None:
@@ -195,7 +197,8 @@ def _import_name(name: str) -> Any:
         try:
             obj = getattr(obj, name)
         except AttributeError:
-            raise RuntimeError(f"{parts[:i]}, {parts[i + 1:]}, {obj} {name}")
+            msg = f"{parts[:i]}, {parts[i + 1 :]}, {obj} {name}"
+            raise RuntimeError(msg)
     return obj
 
 
@@ -255,9 +258,8 @@ def _check_array_function_arguments(**kwargs):
     # TODO: Figure out a better solution for documenting dispatched functions
     invalid_args = [k for k, v in kwargs.items() if v is not None]
     if len(invalid_args) > 0:
-        raise TypeError(
-            f"Arguments {invalid_args} are only valid if an AnnData object is passed."
-        )
+        msg = f"Arguments {invalid_args} are only valid if an AnnData object is passed."
+        raise TypeError(msg)
 
 
 def _check_use_raw(
@@ -350,16 +352,14 @@ def compute_association_matrix_of_groups(
         reference labels, entries are proportional to degree of association.
     """
     if normalization not in {"prediction", "reference"}:
-        raise ValueError(
-            '`normalization` needs to be either "prediction" or "reference".'
-        )
+        msg = '`normalization` needs to be either "prediction" or "reference".'
+        raise ValueError(msg)
     sanitize_anndata(adata)
     cats = adata.obs[reference].cat.categories
     for cat in cats:
         if cat in settings.categories_to_ignore:
             logg.info(
-                f"Ignoring category {cat!r} "
-                "as it’s in `settings.categories_to_ignore`."
+                f"Ignoring category {cat!r} as it’s in `settings.categories_to_ignore`."
             )
     asso_names: list[str] = []
     asso_matrix: list[list[float]] = []
@@ -604,7 +604,8 @@ def broadcast_axis(divisor: Scaling_T, axis: Literal[0, 1]) -> Scaling_T:
 
 def check_op(op):
     if op not in {truediv, mul}:
-        raise ValueError(f"{op} not one of truediv or mul")
+        msg = f"{op} not one of truediv or mul"
+        raise ValueError(msg)
 
 
 @singledispatch
@@ -639,9 +640,8 @@ def _(
 ) -> sparse.csr_matrix | sparse.csc_matrix:
     check_op(op)
     if out is not None and X.data is not out.data:
-        raise ValueError(
-            "`out` argument provided but not equal to X.  This behavior is not supported for sparse matrix scaling."
-        )
+        msg = "`out` argument provided but not equal to X.  This behavior is not supported for sparse matrix scaling."
+        raise ValueError(msg)
     if not allow_divide_by_zero and op is truediv:
         scaling_array = scaling_array.copy() + (scaling_array == 0)
 
@@ -697,9 +697,8 @@ def _(
 ) -> DaskArray:
     check_op(op)
     if out is not None:
-        raise TypeError(
-            "`out` is not `None`. Do not do in-place modifications on dask arrays."
-        )
+        msg = "`out` is not `None`. Do not do in-place modifications on dask arrays."
+        raise TypeError(msg)
 
     import dask.array as da
 
@@ -805,9 +804,8 @@ def sum_drop_keepdims(*args, **kwargs):
             axis = kwargs["axis"]
             if isinstance(axis, tuple):
                 if len(axis) != 1:
-                    raise ValueError(
-                        f"`axis_sum` can only sum over one axis when `axis` arg is provided but got {axis} instead"
-                    )
+                    msg = f"`axis_sum` can only sum over one axis when `axis` arg is provided but got {axis} instead"
+                    raise ValueError(msg)
                 kwargs["axis"] = axis[0]
         # returns a np.matrix normally, which is undesireable
         return np.array(np.sum(*args, dtype=dtype, **kwargs))
@@ -959,7 +957,8 @@ def subsample(
         Xsampled = np.array(X[rows])
     else:
         if seed < 0:
-            raise ValueError(f"Invalid seed value < 0: {seed}")
+            msg = f"Invalid seed value < 0: {seed}"
+            raise ValueError(msg)
         n = int(X.shape[0] / subsample)
         np.random.seed(seed)
         Xsampled, rows = subsample_n(X, n=n)
@@ -989,7 +988,8 @@ def subsample_n(
         Indices of rows that are stored in Xsampled.
     """
     if n < 0:
-        raise ValueError("n must be greater 0")
+        msg = "n must be greater 0"
+        raise ValueError(msg)
     np.random.seed(seed)
     n = X.shape[0] if (n == 0 or n > X.shape[0]) else n
     rows = np.random.choice(X.shape[0], size=n, replace=False)
@@ -1069,13 +1069,15 @@ def __init__(self, adata: AnnData, key=None):
 
         if key is None or key == "neighbors":
             if "neighbors" not in adata.uns:
-                raise KeyError('No "neighbors" in .uns')
+                msg = 'No "neighbors" in .uns'
+                raise KeyError(msg)
             self._neighbors_dict = adata.uns["neighbors"]
             self._conns_key = "connectivities"
             self._dists_key = "distances"
         else:
             if key not in adata.uns:
-                raise KeyError(f'No "{key}" in .uns')
+                msg = f"No {key!r} in .uns"
+                raise KeyError(msg)
             self._neighbors_dict = adata.uns[key]
             self._conns_key = self._neighbors_dict["connectivities_key"]
             self._dists_key = self._neighbors_dict["distances_key"]
@@ -1108,11 +1110,13 @@ def __getitem__(self, key: Literal["connectivities_key"]) -> str: ...
     def __getitem__(self, key: str):
         if key == "distances":
             if "distances" not in self:
-                raise KeyError(f'No "{self._dists_key}" in .obsp')
+                msg = f"No {self._dists_key!r} in .obsp"
+                raise KeyError(msg)
             return self._distances
         elif key == "connectivities":
             if "connectivities" not in self:
-                raise KeyError(f'No "{self._conns_key}" in .obsp')
+                msg = f"No {self._conns_key!r} in .obsp"
+                raise KeyError(msg)
             return self._connectivities
         elif key == "connectivities_key":
             return self._conns_key
@@ -1131,19 +1135,18 @@ def __contains__(self, key: str) -> bool:
 def _choose_graph(adata, obsp, neighbors_key):
     """Choose connectivities from neighbbors or another obsp column"""
     if obsp is not None and neighbors_key is not None:
-        raise ValueError(
-            "You can't specify both obsp, neighbors_key. " "Please select only one."
-        )
+        msg = "You can't specify both obsp, neighbors_key. Please select only one."
+        raise ValueError(msg)
 
     if obsp is not None:
         return adata.obsp[obsp]
     else:
         neighbors = NeighborsView(adata, neighbors_key)
         if "connectivities" not in neighbors:
-            raise ValueError(
-                "You need to run `pp.neighbors` first "
-                "to compute a neighborhood graph."
+            msg = (
+                "You need to run `pp.neighbors` first to compute a neighborhood graph."
             )
+            raise ValueError(msg)
         return neighbors["connectivities"]
 
 
@@ -1154,7 +1157,8 @@ def _resolve_axis(
         return (0, "obs")
     if axis in {1, "var"}:
         return (1, "var")
-    raise ValueError(f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}")
+    msg = f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}"
+    raise ValueError(msg)
 
 
 def is_backed_type(X: object) -> bool:
@@ -1163,6 +1167,5 @@ def is_backed_type(X: object) -> bool:
 
 def raise_not_implemented_error_if_backed_type(X: object, method_name: str) -> None:
     if is_backed_type(X):
-        raise NotImplementedError(
-            f"{method_name} is not implemented for matrices of type {type(X)}"
-        )
+        msg = f"{method_name} is not implemented for matrices of type {type(X)}"
+        raise NotImplementedError(msg)
diff --git a/src/scanpy/_utils/_doctests.py b/src/scanpy/_utils/_doctests.py
@@ -19,7 +19,8 @@ def decorator(func: F) -> F:
 def doctest_skip(reason: str) -> Callable[[F], F]:
     """Mark function so doctest is skipped."""
     if not reason:
-        raise ValueError("reason must not be empty")
+        msg = "reason must not be empty"
+        raise ValueError(msg)
 
     def decorator(func: F) -> F:
         func._doctest_skip_reason = reason

diff --git a/src/scanpy/_utils/compute/is_constant.py b/src/scanpy/_utils/compute/is_constant.py
@@ -24,9 +24,11 @@ def _check_axis_supported(wrapped: C) -> C:
     def func(a, axis=None):
         if axis is not None:
             if not isinstance(axis, Integral):
-                raise TypeError("axis must be integer or None.")
+                msg = "axis must be integer or None."
+                raise TypeError(msg)
             if axis not in (0, 1):
-                raise NotImplementedError("We only support axis 0 and 1 at the moment")
+                msg = "We only support axis 0 and 1 at the moment"
+                raise NotImplementedError(msg)
         return wrapped(a, axis)
 
     return func

diff --git a/src/scanpy/experimental/pp/_highly_variable_genes.py b/src/scanpy/experimental/pp/_highly_variable_genes.py
@@ -159,7 +159,8 @@ def _highly_variable_pearson_residuals(
     if theta <= 0:
         # TODO: would "underdispersion" with negative theta make sense?
         # then only theta=0 were undefined..
-        raise ValueError("Pearson residuals require theta > 0")
+        msg = "Pearson residuals require theta > 0"
+        raise ValueError(msg)
     # prepare clipping
 
     if batch_key is None:
@@ -185,7 +186,8 @@ def _highly_variable_pearson_residuals(
             n = X_batch.shape[0]
             clip = np.sqrt(n)
         if clip < 0:
-            raise ValueError("Pearson residuals require `clip>=0` or `clip=None`.")
+            msg = "Pearson residuals require `clip>=0` or `clip=None`."
+            raise ValueError(msg)
 
         if sp_sparse.issparse(X_batch):
             X_batch = X_batch.tocsc()
@@ -378,17 +380,19 @@ def highly_variable_genes(
     logg.info("extracting highly variable genes")
 
     if not isinstance(adata, AnnData):
-        raise ValueError(
+        msg = (
             "`pp.highly_variable_genes` expects an `AnnData` argument, "
             "pass `inplace=False` if you want to return a `pd.DataFrame`."
         )
+        raise ValueError(msg)
 
     if flavor == "pearson_residuals":
         if n_top_genes is None:
-            raise ValueError(
+            msg = (
                 "`pp.highly_variable_genes` requires the argument `n_top_genes`"
                 " for `flavor='pearson_residuals'`"
             )
+            raise ValueError(msg)
         return _highly_variable_pearson_residuals(
             adata,
             layer=layer,
@@ -402,6 +406,5 @@ def highly_variable_genes(
             inplace=inplace,
         )
     else:
-        raise ValueError(
-            "This is an experimental API and only `flavor=pearson_residuals` is available."
-        )
+        msg = "This is an experimental API and only `flavor=pearson_residuals` is available."
+        raise ValueError(msg)