diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e87eb88663..c5e0e91d8c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.1 hooks: - id: ruff args: ["--fix"] diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py index 0d49d151ef..4efc304cb6 100755 --- a/ci/scripts/min-deps.py +++ b/ci/scripts/min-deps.py @@ -71,7 +71,9 @@ def extract_min_deps( # If we are referring to other optional dependency lists, resolve them if req.name == project_name: - assert req.extras, f"Project included itself as dependency, without specifying extras: {req}" + assert req.extras, ( + f"Project included itself as dependency, without specifying extras: {req}" + ) for extra in req.extras: extra_deps = pyproject["project"]["optional-dependencies"][extra] dependencies += map(Requirement, extra_deps) diff --git a/docs/extensions/param_police.py b/docs/extensions/param_police.py index 37942d3687..234ad28e62 100644 --- a/docs/extensions/param_police.py +++ b/docs/extensions/param_police.py @@ -37,7 +37,8 @@ def show_param_warnings(app, exception): line, ) if param_warnings: - raise RuntimeError("Encountered text parameter type. Use annotations.") + msg = "Encountered text parameter type. Use annotations." + raise RuntimeError(msg) def setup(app: Sphinx): diff --git a/pyproject.toml b/pyproject.toml index cd26d2f9a2..71f7f1c482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -230,7 +230,7 @@ select = [ "W", # Warning detected by Pycodestyle "UP", # pyupgrade "I", # isort - "TCH", # manage type checking blocks + "TC", # manage type checking blocks "TID251", # Banned imports "ICN", # Follow import conventions "PTH", # Pathlib instead of os.path @@ -239,6 +239,7 @@ select = [ "FBT", # No positional boolean parameters "PT", # Pytest style "SIM", # Simplify control flow + "EM", # Traceback-friendly error messages ] ignore = [ # line too long -> we accept long comment lines; black gets rid of long code lines diff --git a/src/scanpy/__init__.py b/src/scanpy/__init__.py index bbcc86437b..b844372d1e 100644 --- a/src/scanpy/__init__.py +++ b/src/scanpy/__init__.py @@ -15,9 +15,8 @@ try: from ._version import __version__ except ModuleNotFoundError: - raise RuntimeError( - "scanpy is not correctly installed. Please install it, e.g. with pip." - ) + msg = "scanpy is not correctly installed. Please install it, e.g. with pip." + raise RuntimeError(msg) from ._utils import check_versions diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 5543689ef7..fc9ead09b0 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -83,7 +83,8 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]): else: type_names = [t.__name__ for t in types] possible_types_str = f"{', '.join(type_names[:-1])} or {type_names[-1]}" - raise TypeError(f"{varname} must be of type {possible_types_str}") + msg = f"{varname} must be of type {possible_types_str}" + raise TypeError(msg) class ScanpyConfig: @@ -180,10 +181,11 @@ def verbosity(self, verbosity: Verbosity | int | str): elif isinstance(verbosity, str): verbosity = verbosity.lower() if verbosity not in verbosity_str_options: - raise ValueError( + msg = ( f"Cannot set verbosity to {verbosity}. " f"Accepted string values are: {verbosity_str_options}" ) + raise ValueError(msg) else: self._verbosity = Verbosity(verbosity_str_options.index(verbosity)) else: @@ -214,10 +216,11 @@ def file_format_data(self, file_format: str): _type_check(file_format, "file_format_data", str) file_format_options = {"txt", "csv", "h5ad"} if file_format not in file_format_options: - raise ValueError( + msg = ( f"Cannot set file_format_data to {file_format}. " f"Must be one of {file_format_options}" ) + raise ValueError(msg) self._file_format_data = file_format @property @@ -322,10 +325,11 @@ def cache_compression(self) -> str | None: @cache_compression.setter def cache_compression(self, cache_compression: str | None): if cache_compression not in {"lzf", "gzip", None}: - raise ValueError( + msg = ( f"`cache_compression` ({cache_compression}) " "must be in {'lzf', 'gzip', None}" ) + raise ValueError(msg) self._cache_compression = cache_compression @property diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index 67e2ae03c8..326ea216d1 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -93,11 +93,12 @@ def __getattr__(self, attr: str): def ensure_igraph() -> None: if importlib.util.find_spec("igraph"): return - raise ImportError( + msg = ( "Please install the igraph package: " "`conda install -c conda-forge python-igraph` or " "`pip3 install igraph`." ) + raise ImportError(msg) @contextmanager @@ -120,10 +121,11 @@ def check_versions(): if Version(anndata_version) < Version("0.6.10"): from .. import __version__ - raise ImportError( + msg = ( f"Scanpy {__version__} needs anndata version >=0.6.10, " f"not {anndata_version}.\nRun `pip install anndata -U --no-deps`." ) + raise ImportError(msg) def getdoc(c_or_f: Callable | type) -> str | None: @@ -195,7 +197,8 @@ def _import_name(name: str) -> Any: try: obj = getattr(obj, name) except AttributeError: - raise RuntimeError(f"{parts[:i]}, {parts[i + 1:]}, {obj} {name}") + msg = f"{parts[:i]}, {parts[i + 1 :]}, {obj} {name}" + raise RuntimeError(msg) return obj @@ -255,9 +258,8 @@ def _check_array_function_arguments(**kwargs): # TODO: Figure out a better solution for documenting dispatched functions invalid_args = [k for k, v in kwargs.items() if v is not None] if len(invalid_args) > 0: - raise TypeError( - f"Arguments {invalid_args} are only valid if an AnnData object is passed." - ) + msg = f"Arguments {invalid_args} are only valid if an AnnData object is passed." + raise TypeError(msg) def _check_use_raw( @@ -350,16 +352,14 @@ def compute_association_matrix_of_groups( reference labels, entries are proportional to degree of association. """ if normalization not in {"prediction", "reference"}: - raise ValueError( - '`normalization` needs to be either "prediction" or "reference".' - ) + msg = '`normalization` needs to be either "prediction" or "reference".' + raise ValueError(msg) sanitize_anndata(adata) cats = adata.obs[reference].cat.categories for cat in cats: if cat in settings.categories_to_ignore: logg.info( - f"Ignoring category {cat!r} " - "as it’s in `settings.categories_to_ignore`." + f"Ignoring category {cat!r} as it’s in `settings.categories_to_ignore`." ) asso_names: list[str] = [] asso_matrix: list[list[float]] = [] @@ -604,7 +604,8 @@ def broadcast_axis(divisor: Scaling_T, axis: Literal[0, 1]) -> Scaling_T: def check_op(op): if op not in {truediv, mul}: - raise ValueError(f"{op} not one of truediv or mul") + msg = f"{op} not one of truediv or mul" + raise ValueError(msg) @singledispatch @@ -639,9 +640,8 @@ def _( ) -> sparse.csr_matrix | sparse.csc_matrix: check_op(op) if out is not None and X.data is not out.data: - raise ValueError( - "`out` argument provided but not equal to X. This behavior is not supported for sparse matrix scaling." - ) + msg = "`out` argument provided but not equal to X. This behavior is not supported for sparse matrix scaling." + raise ValueError(msg) if not allow_divide_by_zero and op is truediv: scaling_array = scaling_array.copy() + (scaling_array == 0) @@ -697,9 +697,8 @@ def _( ) -> DaskArray: check_op(op) if out is not None: - raise TypeError( - "`out` is not `None`. Do not do in-place modifications on dask arrays." - ) + msg = "`out` is not `None`. Do not do in-place modifications on dask arrays." + raise TypeError(msg) import dask.array as da @@ -805,9 +804,8 @@ def sum_drop_keepdims(*args, **kwargs): axis = kwargs["axis"] if isinstance(axis, tuple): if len(axis) != 1: - raise ValueError( - f"`axis_sum` can only sum over one axis when `axis` arg is provided but got {axis} instead" - ) + msg = f"`axis_sum` can only sum over one axis when `axis` arg is provided but got {axis} instead" + raise ValueError(msg) kwargs["axis"] = axis[0] # returns a np.matrix normally, which is undesireable return np.array(np.sum(*args, dtype=dtype, **kwargs)) @@ -959,7 +957,8 @@ def subsample( Xsampled = np.array(X[rows]) else: if seed < 0: - raise ValueError(f"Invalid seed value < 0: {seed}") + msg = f"Invalid seed value < 0: {seed}" + raise ValueError(msg) n = int(X.shape[0] / subsample) np.random.seed(seed) Xsampled, rows = subsample_n(X, n=n) @@ -989,7 +988,8 @@ def subsample_n( Indices of rows that are stored in Xsampled. """ if n < 0: - raise ValueError("n must be greater 0") + msg = "n must be greater 0" + raise ValueError(msg) np.random.seed(seed) n = X.shape[0] if (n == 0 or n > X.shape[0]) else n rows = np.random.choice(X.shape[0], size=n, replace=False) @@ -1069,13 +1069,15 @@ def __init__(self, adata: AnnData, key=None): if key is None or key == "neighbors": if "neighbors" not in adata.uns: - raise KeyError('No "neighbors" in .uns') + msg = 'No "neighbors" in .uns' + raise KeyError(msg) self._neighbors_dict = adata.uns["neighbors"] self._conns_key = "connectivities" self._dists_key = "distances" else: if key not in adata.uns: - raise KeyError(f'No "{key}" in .uns') + msg = f"No {key!r} in .uns" + raise KeyError(msg) self._neighbors_dict = adata.uns[key] self._conns_key = self._neighbors_dict["connectivities_key"] self._dists_key = self._neighbors_dict["distances_key"] @@ -1108,11 +1110,13 @@ def __getitem__(self, key: Literal["connectivities_key"]) -> str: ... def __getitem__(self, key: str): if key == "distances": if "distances" not in self: - raise KeyError(f'No "{self._dists_key}" in .obsp') + msg = f"No {self._dists_key!r} in .obsp" + raise KeyError(msg) return self._distances elif key == "connectivities": if "connectivities" not in self: - raise KeyError(f'No "{self._conns_key}" in .obsp') + msg = f"No {self._conns_key!r} in .obsp" + raise KeyError(msg) return self._connectivities elif key == "connectivities_key": return self._conns_key @@ -1131,19 +1135,18 @@ def __contains__(self, key: str) -> bool: def _choose_graph(adata, obsp, neighbors_key): """Choose connectivities from neighbbors or another obsp column""" if obsp is not None and neighbors_key is not None: - raise ValueError( - "You can't specify both obsp, neighbors_key. " "Please select only one." - ) + msg = "You can't specify both obsp, neighbors_key. Please select only one." + raise ValueError(msg) if obsp is not None: return adata.obsp[obsp] else: neighbors = NeighborsView(adata, neighbors_key) if "connectivities" not in neighbors: - raise ValueError( - "You need to run `pp.neighbors` first " - "to compute a neighborhood graph." + msg = ( + "You need to run `pp.neighbors` first to compute a neighborhood graph." ) + raise ValueError(msg) return neighbors["connectivities"] @@ -1154,7 +1157,8 @@ def _resolve_axis( return (0, "obs") if axis in {1, "var"}: return (1, "var") - raise ValueError(f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}") + msg = f"`axis` must be either 0, 1, 'obs', or 'var', was {axis!r}" + raise ValueError(msg) def is_backed_type(X: object) -> bool: @@ -1163,6 +1167,5 @@ def is_backed_type(X: object) -> bool: def raise_not_implemented_error_if_backed_type(X: object, method_name: str) -> None: if is_backed_type(X): - raise NotImplementedError( - f"{method_name} is not implemented for matrices of type {type(X)}" - ) + msg = f"{method_name} is not implemented for matrices of type {type(X)}" + raise NotImplementedError(msg) diff --git a/src/scanpy/_utils/_doctests.py b/src/scanpy/_utils/_doctests.py index 6a08099a24..0b3be18bbe 100644 --- a/src/scanpy/_utils/_doctests.py +++ b/src/scanpy/_utils/_doctests.py @@ -19,7 +19,8 @@ def decorator(func: F) -> F: def doctest_skip(reason: str) -> Callable[[F], F]: """Mark function so doctest is skipped.""" if not reason: - raise ValueError("reason must not be empty") + msg = "reason must not be empty" + raise ValueError(msg) def decorator(func: F) -> F: func._doctest_skip_reason = reason diff --git a/src/scanpy/_utils/compute/is_constant.py b/src/scanpy/_utils/compute/is_constant.py index 1bc147d68e..c9fac4abf0 100644 --- a/src/scanpy/_utils/compute/is_constant.py +++ b/src/scanpy/_utils/compute/is_constant.py @@ -24,9 +24,11 @@ def _check_axis_supported(wrapped: C) -> C: def func(a, axis=None): if axis is not None: if not isinstance(axis, Integral): - raise TypeError("axis must be integer or None.") + msg = "axis must be integer or None." + raise TypeError(msg) if axis not in (0, 1): - raise NotImplementedError("We only support axis 0 and 1 at the moment") + msg = "We only support axis 0 and 1 at the moment" + raise NotImplementedError(msg) return wrapped(a, axis) return func diff --git a/src/scanpy/experimental/pp/_highly_variable_genes.py b/src/scanpy/experimental/pp/_highly_variable_genes.py index ab78f0a74a..7ad9f36bd7 100644 --- a/src/scanpy/experimental/pp/_highly_variable_genes.py +++ b/src/scanpy/experimental/pp/_highly_variable_genes.py @@ -159,7 +159,8 @@ def _highly_variable_pearson_residuals( if theta <= 0: # TODO: would "underdispersion" with negative theta make sense? # then only theta=0 were undefined.. - raise ValueError("Pearson residuals require theta > 0") + msg = "Pearson residuals require theta > 0" + raise ValueError(msg) # prepare clipping if batch_key is None: @@ -185,7 +186,8 @@ def _highly_variable_pearson_residuals( n = X_batch.shape[0] clip = np.sqrt(n) if clip < 0: - raise ValueError("Pearson residuals require `clip>=0` or `clip=None`.") + msg = "Pearson residuals require `clip>=0` or `clip=None`." + raise ValueError(msg) if sp_sparse.issparse(X_batch): X_batch = X_batch.tocsc() @@ -378,17 +380,19 @@ def highly_variable_genes( logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): - raise ValueError( + msg = ( "`pp.highly_variable_genes` expects an `AnnData` argument, " "pass `inplace=False` if you want to return a `pd.DataFrame`." ) + raise ValueError(msg) if flavor == "pearson_residuals": if n_top_genes is None: - raise ValueError( + msg = ( "`pp.highly_variable_genes` requires the argument `n_top_genes`" " for `flavor='pearson_residuals'`" ) + raise ValueError(msg) return _highly_variable_pearson_residuals( adata, layer=layer, @@ -402,6 +406,5 @@ def highly_variable_genes( inplace=inplace, ) else: - raise ValueError( - "This is an experimental API and only `flavor=pearson_residuals` is available." - ) + msg = "This is an experimental API and only `flavor=pearson_residuals` is available." + raise ValueError(msg) diff --git a/src/scanpy/experimental/pp/_normalization.py b/src/scanpy/experimental/pp/_normalization.py index bc4dedbaf9..ef3d0311d7 100644 --- a/src/scanpy/experimental/pp/_normalization.py +++ b/src/scanpy/experimental/pp/_normalization.py @@ -42,13 +42,15 @@ def _pearson_residuals(X, theta, clip, check_values, *, copy: bool = False): if theta <= 0: # TODO: would "underdispersion" with negative theta make sense? # then only theta=0 were undefined.. - raise ValueError("Pearson residuals require theta > 0") + msg = "Pearson residuals require theta > 0" + raise ValueError(msg) # prepare clipping if clip is None: n = X.shape[0] clip = np.sqrt(n) if clip < 0: - raise ValueError("Pearson residuals require `clip>=0` or `clip=None`.") + msg = "Pearson residuals require `clip>=0` or `clip=None`." + raise ValueError(msg) if check_values and not check_nonnegative_integers(X): warn( @@ -128,7 +130,8 @@ def normalize_pearson_residuals( if copy: if not inplace: - raise ValueError("`copy=True` cannot be used with `inplace=False`.") + msg = "`copy=True` cannot be used with `inplace=False`." + raise ValueError(msg) adata = adata.copy() view_to_actual(adata) diff --git a/src/scanpy/external/exporting.py b/src/scanpy/external/exporting.py index 9364b7d368..8379720ea6 100644 --- a/src/scanpy/external/exporting.py +++ b/src/scanpy/external/exporting.py @@ -86,7 +86,8 @@ def spring_project( neighbors_key = "neighbors" if neighbors_key not in adata.uns: - raise ValueError("Run `sc.pp.neighbors` first.") + msg = "Run `sc.pp.neighbors` first." + raise ValueError(msg) # check that requested 2-D embedding has been generated if embedding_method not in adata.obsm_keys(): @@ -101,9 +102,8 @@ def spring_project( + adata.uns[embedding_method]["params"]["layout"] ) else: - raise ValueError( - f"Run the specified embedding method `{embedding_method}` first." - ) + msg = f"Run the specified embedding method `{embedding_method}` first." + raise ValueError(msg) coords = adata.obsm[embedding_method] diff --git a/src/scanpy/external/pl.py b/src/scanpy/external/pl.py index a6ad48f718..ce305e2f06 100644 --- a/src/scanpy/external/pl.py +++ b/src/scanpy/external/pl.py @@ -198,9 +198,8 @@ def sam( try: dt = adata.obsm[projection] except KeyError: - raise ValueError( - "Please create a projection first using run_umap or run_tsne" - ) + msg = "Please create a projection first using run_umap or run_tsne" + raise ValueError(msg) else: dt = projection diff --git a/src/scanpy/external/pp/_bbknn.py b/src/scanpy/external/pp/_bbknn.py index 07d6e41f93..ee280cc824 100644 --- a/src/scanpy/external/pp/_bbknn.py +++ b/src/scanpy/external/pp/_bbknn.py @@ -133,7 +133,8 @@ def bbknn( try: from bbknn import bbknn except ImportError: - raise ImportError("Please install bbknn: `pip install bbknn`.") + msg = "Please install bbknn: `pip install bbknn`." + raise ImportError(msg) return bbknn( adata=adata, batch_key=batch_key, diff --git a/src/scanpy/external/pp/_dca.py b/src/scanpy/external/pp/_dca.py index c47fff90f2..20a97034b8 100644 --- a/src/scanpy/external/pp/_dca.py +++ b/src/scanpy/external/pp/_dca.py @@ -181,7 +181,8 @@ def dca( try: from dca.api import dca except ImportError: - raise ImportError("Please install dca package (>= 0.2.1) via `pip install dca`") + msg = "Please install dca package (>= 0.2.1) via `pip install dca`" + raise ImportError(msg) return dca( adata, diff --git a/src/scanpy/external/pp/_harmony_integrate.py b/src/scanpy/external/pp/_harmony_integrate.py index 1104690d53..824309f817 100644 --- a/src/scanpy/external/pp/_harmony_integrate.py +++ b/src/scanpy/external/pp/_harmony_integrate.py @@ -91,7 +91,8 @@ def harmony_integrate( try: import harmonypy except ImportError: - raise ImportError("\nplease install harmonypy:\n\n\tpip install harmonypy") + msg = "\nplease install harmonypy:\n\n\tpip install harmonypy" + raise ImportError(msg) X = adata.obsm[basis].astype(np.float64) diff --git a/src/scanpy/external/pp/_hashsolo.py b/src/scanpy/external/pp/_hashsolo.py index 256c863eee..dcb44239b1 100644 --- a/src/scanpy/external/pp/_hashsolo.py +++ b/src/scanpy/external/pp/_hashsolo.py @@ -352,15 +352,15 @@ def hashsolo( adata = adata.copy() if not inplace else adata data = adata.obs[cell_hashing_columns].values if not check_nonnegative_integers(data): - raise ValueError("Cell hashing counts must be non-negative") + msg = "Cell hashing counts must be non-negative" + raise ValueError(msg) if (number_of_noise_barcodes is not None) and ( number_of_noise_barcodes >= len(cell_hashing_columns) ): - raise ValueError( - "number_of_noise_barcodes must be at least one less \ + msg = "number_of_noise_barcodes must be at least one less \ than the number of samples you have as determined by the number of \ cell_hashing_columns you've given as input " - ) + raise ValueError(msg) num_of_cells = adata.shape[0] results = pd.DataFrame( np.zeros((num_of_cells, 6)), diff --git a/src/scanpy/external/pp/_magic.py b/src/scanpy/external/pp/_magic.py index 132d2a6448..12e93f1a8e 100644 --- a/src/scanpy/external/pp/_magic.py +++ b/src/scanpy/external/pp/_magic.py @@ -142,34 +142,38 @@ def magic( try: from magic import MAGIC, __version__ except ImportError: - raise ImportError( + msg = ( "Please install magic package via `pip install --user " "git+git://github.com/KrishnaswamyLab/MAGIC.git#subdirectory=python`" ) + raise ImportError(msg) else: if Version(__version__) < Version(MIN_VERSION): - raise ImportError( + msg = ( "scanpy requires magic-impute >= " f"v{MIN_VERSION} (detected: v{__version__}). " "Please update magic package via `pip install --user " "--upgrade magic-impute`" ) + raise ImportError(msg) start = logg.info("computing MAGIC") all_or_pca = isinstance(name_list, str | NoneType) if all_or_pca and name_list not in {"all_genes", "pca_only", None}: - raise ValueError( + msg = ( "Invalid string value for `name_list`: " "Only `'all_genes'` and `'pca_only'` are allowed." ) + raise ValueError(msg) if copy is None: copy = not all_or_pca elif not all_or_pca and not copy: - raise ValueError( + msg = ( "Can only perform MAGIC in-place with `name_list=='all_genes' or " f"`name_list=='pca_only'` (got {name_list}). Consider setting " "`copy=True`" ) + raise ValueError(msg) adata = adata.copy() if copy else adata n_jobs = settings.n_jobs if n_jobs is None else n_jobs diff --git a/src/scanpy/external/pp/_mnn_correct.py b/src/scanpy/external/pp/_mnn_correct.py index a497189913..518686dc75 100644 --- a/src/scanpy/external/pp/_mnn_correct.py +++ b/src/scanpy/external/pp/_mnn_correct.py @@ -133,10 +133,8 @@ def mnn_correct( import mnnpy from mnnpy import mnn_correct except ImportError: - raise ImportError( - "Please install the package mnnpy " - "(https://github.com/chriscainx/mnnpy). " - ) + msg = "Please install the package mnnpy (https://github.com/chriscainx/mnnpy). " + raise ImportError(msg) n_jobs = settings.n_jobs if n_jobs is None else n_jobs diff --git a/src/scanpy/external/pp/_scanorama_integrate.py b/src/scanpy/external/pp/_scanorama_integrate.py index ca847f8351..c5fb2683b4 100644 --- a/src/scanpy/external/pp/_scanorama_integrate.py +++ b/src/scanpy/external/pp/_scanorama_integrate.py @@ -111,7 +111,8 @@ def scanorama_integrate( try: import scanorama except ImportError: - raise ImportError("\nplease install Scanorama:\n\n\tpip install scanorama") + msg = "\nplease install Scanorama:\n\n\tpip install scanorama" + raise ImportError(msg) # Get batch indices in linear time. curr_batch = None @@ -123,7 +124,8 @@ def scanorama_integrate( curr_batch = batch_name if batch_name in batch_names: # Contiguous batches important for preserving cell order. - raise ValueError("Detected non-contiguous batches.") + msg = "Detected non-contiguous batches." + raise ValueError(msg) batch_names.append(batch_name) # Preserve name order. name2idx[batch_name] = [] name2idx[batch_name].append(idx) diff --git a/src/scanpy/external/tl/_harmony_timeseries.py b/src/scanpy/external/tl/_harmony_timeseries.py index d1746af45a..de3f8cde26 100644 --- a/src/scanpy/external/tl/_harmony_timeseries.py +++ b/src/scanpy/external/tl/_harmony_timeseries.py @@ -140,13 +140,15 @@ def harmony_timeseries( try: import harmony except ImportError: - raise ImportError("\nplease install harmony:\n\n\tpip install harmonyTS") + msg = "\nplease install harmony:\n\n\tpip install harmonyTS" + raise ImportError(msg) adata = adata.copy() if copy else adata logg.info("Harmony augmented affinity matrix") if adata.obs[tp].dtype.name != "category": - raise ValueError(f"{tp!r} column does not contain Categorical data") + msg = f"{tp!r} column does not contain Categorical data" + raise ValueError(msg) timepoints = adata.obs[tp].cat.categories.tolist() timepoint_connections = pd.DataFrame(np.array([timepoints[:-1], timepoints[1:]]).T) diff --git a/src/scanpy/external/tl/_palantir.py b/src/scanpy/external/tl/_palantir.py index 854301466a..eb060bbbe0 100644 --- a/src/scanpy/external/tl/_palantir.py +++ b/src/scanpy/external/tl/_palantir.py @@ -340,4 +340,5 @@ def _check_import(): try: import palantir # noqa: F401 except ImportError: - raise ImportError("\nplease install palantir:\n\tpip install palantir") + msg = "\nplease install palantir:\n\tpip install palantir" + raise ImportError(msg) diff --git a/src/scanpy/external/tl/_phate.py b/src/scanpy/external/tl/_phate.py index ff50a1e6f7..91d8191e60 100644 --- a/src/scanpy/external/tl/_phate.py +++ b/src/scanpy/external/tl/_phate.py @@ -154,10 +154,11 @@ def phate( try: import phate except ImportError: - raise ImportError( + msg = ( "You need to install the package `phate`: please run `pip install " "--user phate` in a terminal." ) + raise ImportError(msg) X_phate = phate.PHATE( n_components=n_components, k=k, @@ -179,6 +180,6 @@ def phate( logg.info( " finished", time=start, - deep=("added\n" " 'X_phate', PHATE coordinates (adata.obsm)"), + deep=("added\n 'X_phate', PHATE coordinates (adata.obsm)"), ) return adata if copy else None diff --git a/src/scanpy/external/tl/_phenograph.py b/src/scanpy/external/tl/_phenograph.py index 24e10bcb85..fdc3973771 100644 --- a/src/scanpy/external/tl/_phenograph.py +++ b/src/scanpy/external/tl/_phenograph.py @@ -226,17 +226,19 @@ def phenograph( assert phenograph.__version__ >= "1.5.3" except (ImportError, AssertionError, AttributeError): - raise ImportError( + msg = ( "please install the latest release of phenograph:\n\t" "pip install -U PhenoGraph" ) + raise ImportError(msg) if isinstance(data, AnnData): adata = data try: data = data.obsm["X_pca"] except KeyError: - raise KeyError("Please run `sc.pp.pca` on `data` and try again!") + msg = "Please run `sc.pp.pca` on `data` and try again!" + raise KeyError(msg) else: adata = None copy = True diff --git a/src/scanpy/external/tl/_pypairs.py b/src/scanpy/external/tl/_pypairs.py index 255334fe7a..2db98ff9a7 100644 --- a/src/scanpy/external/tl/_pypairs.py +++ b/src/scanpy/external/tl/_pypairs.py @@ -153,8 +153,10 @@ def _check_import(): try: import pypairs except ImportError: - raise ImportError("You need to install the package `pypairs`.") + msg = "You need to install the package `pypairs`." + raise ImportError(msg) min_version = Version("3.0.9") if Version(pypairs.__version__) < min_version: - raise ImportError(f"Please only use `pypairs` >= {min_version}") + msg = f"Please only use `pypairs` >= {min_version}" + raise ImportError(msg) diff --git a/src/scanpy/external/tl/_sam.py b/src/scanpy/external/tl/_sam.py index ebf3156b9a..8daa2c0091 100644 --- a/src/scanpy/external/tl/_sam.py +++ b/src/scanpy/external/tl/_sam.py @@ -211,12 +211,13 @@ def sam( try: from samalg import SAM except ImportError: - raise ImportError( + msg = ( "\nplease install sam-algorithm: \n\n" "\tgit clone git://github.com/atarashansky/self-assembling-manifold.git\n" "\tcd self-assembling-manifold\n" "\tpip install ." ) + raise ImportError(msg) logg.info("Self-assembling manifold") diff --git a/src/scanpy/external/tl/_trimap.py b/src/scanpy/external/tl/_trimap.py index 9146e79b84..122a4792b7 100644 --- a/src/scanpy/external/tl/_trimap.py +++ b/src/scanpy/external/tl/_trimap.py @@ -108,7 +108,8 @@ def trimap( try: from trimap import TRIMAP except ImportError: - raise ImportError("\nplease install trimap: \n\n\tsudo pip install trimap") + msg = "\nplease install trimap: \n\n\tsudo pip install trimap" + raise ImportError(msg) adata = adata.copy() if copy else adata start = logg.info("computing TriMap") adata = adata.copy() if copy else adata @@ -121,10 +122,11 @@ def trimap( else: X = adata.X if scp.issparse(X): - raise ValueError( + msg = ( "trimap currently does not support sparse matrices. Please" "use a dense matrix or apply pca first." ) + raise ValueError(msg) logg.warning("`X_pca` not found. Run `sc.pp.pca` first for speedup.") X_trimap = TRIMAP( n_dims=n_components, diff --git a/src/scanpy/external/tl/_wishbone.py b/src/scanpy/external/tl/_wishbone.py index e857226feb..3b85ae14a1 100644 --- a/src/scanpy/external/tl/_wishbone.py +++ b/src/scanpy/external/tl/_wishbone.py @@ -104,17 +104,17 @@ def wishbone( try: from wishbone.core import wishbone as c_wishbone except ImportError: - raise ImportError( - "\nplease install wishbone:\n\n\thttps://github.com/dpeerlab/wishbone" - ) + msg = "\nplease install wishbone:\n\n\thttps://github.com/dpeerlab/wishbone" + raise ImportError(msg) # Start cell index s = np.where(adata.obs_names == start_cell)[0] if len(s) == 0: - raise RuntimeError( + msg = ( f"Start cell {start_cell} not found in data. " "Please rerun with correct start cell." ) + raise RuntimeError(msg) if isinstance(num_waypoints, Collection): diff = np.setdiff1d(num_waypoints, adata.obs.index) if diff.size > 0: @@ -124,10 +124,11 @@ def wishbone( ) num_waypoints = diff.tolist() elif num_waypoints > adata.shape[0]: - raise RuntimeError( + msg = ( "num_waypoints parameter is higher than the number of cells in the " "dataset. Please select a smaller number" ) + raise RuntimeError(msg) s = s[0] # Run the algorithm diff --git a/src/scanpy/get/_aggregated.py b/src/scanpy/get/_aggregated.py index 53a18bb47c..94bf202b69 100644 --- a/src/scanpy/get/_aggregated.py +++ b/src/scanpy/get/_aggregated.py @@ -256,25 +256,29 @@ def aggregate( Note that this filters out any combination of groups that wasn't present in the original data. """ if not isinstance(adata, AnnData): - raise NotImplementedError( + msg = ( "sc.get.aggregate is currently only implemented for AnnData input, " f"was passed {type(adata)}." ) + raise NotImplementedError(msg) if axis is None: axis = 1 if varm else 0 axis, axis_name = _resolve_axis(axis) mask = _check_mask(adata, mask, axis_name) data = adata.X if sum(p is not None for p in [varm, obsm, layer]) > 1: - raise TypeError("Please only provide one (or none) of varm, obsm, or layer") + msg = "Please only provide one (or none) of varm, obsm, or layer" + raise TypeError(msg) if varm is not None: if axis != 1: - raise ValueError("varm can only be used when axis is 1") + msg = "varm can only be used when axis is 1" + raise ValueError(msg) data = adata.varm[varm] elif obsm is not None: if axis != 0: - raise ValueError("obsm can only be used when axis is 0") + msg = "obsm can only be used when axis is 0" + raise ValueError(msg) data = adata.obsm[obsm] elif layer is not None: data = adata.layers[layer] @@ -324,7 +328,8 @@ def _aggregate( mask: NDArray[np.bool_] | None = None, dof: int = 1, ): - raise NotImplementedError(f"Data type {type(data)} not supported for aggregation") + msg = f"Data type {type(data)} not supported for aggregation" + raise NotImplementedError(msg) @_aggregate.register(pd.DataFrame) @@ -347,7 +352,8 @@ def aggregate_array( funcs = set([func] if isinstance(func, str) else func) if unknown := funcs - get_literal_vals(AggType): - raise ValueError(f"func {unknown} is not one of {get_literal_vals(AggType)}") + msg = f"func {unknown} is not one of {get_literal_vals(AggType)}" + raise ValueError(msg) if "sum" in funcs: # sum is calculated separately from the rest agg = groupby.sum() diff --git a/src/scanpy/get/get.py b/src/scanpy/get/get.py index c36ddde8f8..abfa51d1f9 100644 --- a/src/scanpy/get/get.py +++ b/src/scanpy/get/get.py @@ -149,18 +149,20 @@ def _check_indices( # be further duplicated when selecting them. if not dim_df.columns.is_unique: dup_cols = dim_df.columns[dim_df.columns.duplicated()].tolist() - raise ValueError( + msg = ( f"adata.{dim} contains duplicated columns. Please rename or remove " "these columns first.\n`" f"Duplicated columns {dup_cols}" ) + raise ValueError(msg) if not alt_index.is_unique: - raise ValueError( + msg = ( f"{alt_repr}.{alt_dim}_names contains duplicated items\n" f"Please rename these {alt_dim} names first for example using " f"`adata.{alt_dim}_names_make_unique()`" ) + raise ValueError(msg) # use only unique keys, otherwise duplicated keys will # further duplicate when reordering the keys later in the function @@ -168,27 +170,26 @@ def _check_indices( if key in dim_df.columns: col_keys.append(key) if key in alt_names.index: - raise KeyError( - f"The key '{key}' is found in both adata.{dim} and {alt_repr}.{alt_search_repr}." - ) + msg = f"The key {key!r} is found in both adata.{dim} and {alt_repr}.{alt_search_repr}." + raise KeyError(msg) elif key in alt_names.index: val = alt_names[key] if isinstance(val, pd.Series): # while var_names must be unique, adata.var[gene_symbols] does not # It's still ambiguous to refer to a duplicated entry though. assert alias_index is not None - raise KeyError( - f"Found duplicate entries for '{key}' in {alt_repr}.{alt_search_repr}." - ) + msg = f"Found duplicate entries for {key!r} in {alt_repr}.{alt_search_repr}." + raise KeyError(msg) index_keys.append(val) index_aliases.append(key) else: not_found.append(key) if len(not_found) > 0: - raise KeyError( - f"Could not find keys '{not_found}' in columns of `adata.{dim}` or in" + msg = ( + f"Could not find keys {not_found!r} in columns of `adata.{dim}` or in" f" {alt_repr}.{alt_search_repr}." ) + raise KeyError(msg) return col_keys, index_keys, index_aliases @@ -286,9 +287,9 @@ def obs_df( if isinstance(keys, str): keys = [keys] if use_raw: - assert ( - layer is None - ), "Cannot specify use_raw=True and a layer at the same time." + assert layer is None, ( + "Cannot specify use_raw=True and a layer at the same time." + ) var = adata.raw.var else: var = adata.var @@ -430,7 +431,8 @@ def _get_obs_rep( """ # https://github.com/scverse/scanpy/issues/1546 if not isinstance(use_raw, bool): - raise TypeError(f"use_raw expected to be bool, was {type(use_raw)}.") + msg = f"use_raw expected to be bool, was {type(use_raw)}." + raise TypeError(msg) is_layer = layer is not None is_raw = use_raw is not False @@ -448,10 +450,11 @@ def _get_obs_rep( return adata.obsm[obsm] if is_obsp: return adata.obsp[obsp] - raise AssertionError( + msg = ( "That was unexpected. Please report this bug at:\n\n\t" "https://github.com/scverse/scanpy/issues" ) + raise AssertionError(msg) def _set_obs_rep( diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index 3aa0ca494c..7bd678f568 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -181,7 +181,7 @@ def print_version_and_date(*, file=None): if file is None: file = sys.stdout print( - f"Running Scanpy {__version__}, " f"on {datetime.now():%Y-%m-%d %H:%M}.", + f"Running Scanpy {__version__}, on {datetime.now():%Y-%m-%d %H:%M}.", file=file, ) diff --git a/src/scanpy/metrics/_gearys_c.py b/src/scanpy/metrics/_gearys_c.py index 358a201eed..cf4220eb7a 100644 --- a/src/scanpy/metrics/_gearys_c.py +++ b/src/scanpy/metrics/_gearys_c.py @@ -113,7 +113,8 @@ def gearys_c( elif "neighbors" in adata.uns: g = adata.uns["neighbors"]["connectivities"] else: - raise ValueError("Must run neighbors first.") + msg = "Must run neighbors first." + raise ValueError(msg) else: raise NotImplementedError() if vals is None: diff --git a/src/scanpy/metrics/_morans_i.py b/src/scanpy/metrics/_morans_i.py index 5e4ab50788..c21c455f38 100644 --- a/src/scanpy/metrics/_morans_i.py +++ b/src/scanpy/metrics/_morans_i.py @@ -112,7 +112,8 @@ def morans_i( elif "neighbors" in adata.uns: g = adata.uns["neighbors"]["connectivities"] else: - raise ValueError("Must run neighbors first.") + msg = "Must run neighbors first." + raise ValueError(msg) else: raise NotImplementedError() if vals is None: diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py index ec5957b325..214043727b 100644 --- a/src/scanpy/neighbors/__init__.py +++ b/src/scanpy/neighbors/__init__.py @@ -425,10 +425,11 @@ def count_nonzero(a: np.ndarray | csr_matrix) -> int: self._eigen_basis = _backwards_compat_get_full_X_diffmap(adata) if n_dcs is not None: if n_dcs > len(self._eigen_values): - raise ValueError( + msg = ( f"Cannot instantiate using `n_dcs`={n_dcs}. " "Compute diffmap/spectrum with more components first." ) + raise ValueError(msg) self._eigen_values = self._eigen_values[:n_dcs] self._eigen_basis = self._eigen_basis[:, :n_dcs] self.n_dcs = len(self._eigen_values) @@ -789,7 +790,8 @@ def compute_eigen( """ np.set_printoptions(precision=10) if self._transitions_sym is None: - raise ValueError("Run `.compute_transitions` first.") + msg = "Run `.compute_transitions` first." + raise ValueError(msg) matrix = self._transitions_sym # compute the spectrum if n_comps == 0: @@ -812,9 +814,7 @@ def compute_eigen( if sort == "decrease": evals = evals[::-1] evecs = evecs[:, ::-1] - logg.info( - f" eigenvalues of transition matrix\n" f"{indent(str(evals), ' ')}" - ) + logg.info(f" eigenvalues of transition matrix\n{indent(str(evals), ' ')}") if self._number_connected_components > len(evals) / 2: logg.warning("Transition matrix has many disconnected components!") self._eigen_values = evals @@ -825,10 +825,11 @@ def _init_iroot(self): # set iroot directly if "iroot" in self._adata.uns: if self._adata.uns["iroot"] >= self._adata.n_obs: - logg.warning( - f'Root cell index {self._adata.uns["iroot"]} does not ' + msg = ( + f"Root cell index {self._adata.uns['iroot']} does not " f"exist for {self._adata.n_obs} samples. It’s ignored." ) + logg.warning(msg) else: self.iroot = self._adata.uns["iroot"] return @@ -890,9 +891,8 @@ def _set_iroot_via_xroot(self, xroot: np.ndarray): condition, only relevant for computing pseudotime. """ if self._adata.shape[1] != xroot.size: - raise ValueError( - "The root vector you provided does not have the " "correct dimension." - ) + msg = "The root vector you provided does not have the correct dimension." + raise ValueError(msg) # this is the squared distance dsqroot = 1e10 iroot = 0 diff --git a/src/scanpy/plotting/_anndata.py b/src/scanpy/plotting/_anndata.py index a93d55699b..75dd210c0b 100755 --- a/src/scanpy/plotting/_anndata.py +++ b/src/scanpy/plotting/_anndata.py @@ -163,7 +163,8 @@ def scatter( if basis is not None: return _scatter_obs(**args) if x is None or y is None: - raise ValueError("Either provide a `basis` or `x` and `y`.") + msg = "Either provide a `basis` or `x` and `y`." + raise ValueError(msg) if _check_if_annotations(adata, "obs", x=x, y=y, colors=color, use_raw=use_raw): return _scatter_obs(**args) if _check_if_annotations(adata, "var", x=x, y=y, colors=color, use_raw=use_raw): @@ -172,10 +173,11 @@ def scatter( # store .uns annotations that were added to the new adata object adata.uns = args_t["adata"].uns return axs - raise ValueError( + msg = ( "`x`, `y`, and potential `color` inputs must all " "come from either `.obs` or `.var`" ) + raise ValueError(msg) def _check_if_annotations( @@ -259,22 +261,23 @@ def _scatter_obs( layers = tuple(layers) for layer in layers: if layer not in adata.layers and layer not in ["X", None]: - raise ValueError( + msg = ( "`layers` should have elements that are " "either None or in adata.layers.keys()." ) + raise ValueError(msg) else: - raise ValueError( + msg = ( "`layers` should be a string or a collection of strings " f"with length 3, had value '{layers}'" ) + raise ValueError(msg) if use_raw and layers not in [("X", "X", "X"), (None, None, None)]: ValueError("`use_raw` must be `False` if layers are used.") if legend_loc not in (valid_legend_locs := get_literal_vals(_utils._LegendLoc)): - raise ValueError( - f"Invalid `legend_loc`, need to be one of: {valid_legend_locs}." - ) + msg = f"Invalid `legend_loc`, need to be one of: {valid_legend_locs}." + raise ValueError(msg) if components is None: components = "1,2" if "2d" in projection else "1,2,3" if isinstance(components, str): @@ -294,9 +297,8 @@ def _scatter_obs( if basis == "diffmap": components -= 1 except KeyError: - raise KeyError( - f"compute coordinates using visualization tool {basis} first" - ) + msg = f"compute coordinates using visualization tool {basis} first" + raise KeyError(msg) elif x is not None and y is not None: if use_raw: if x in adata.obs.columns: @@ -313,7 +315,8 @@ def _scatter_obs( Y = np.c_[x_arr, y_arr] else: - raise ValueError("Either provide a `basis` or `x` and `y`.") + msg = "Either provide a `basis` or `x` and `y`." + raise ValueError(msg) if size is None: n = Y.shape[0] @@ -375,10 +378,11 @@ def _scatter_obs( c = key colorbar = False else: - raise ValueError( + msg = ( f"key {key!r} is invalid! pass valid observation annotation, " f"one of {adata.obs_keys()} or a gene name {adata.var_names}" ) + raise ValueError(msg) if colorbar is None: colorbar = not categorical colorbars.append(colorbar) @@ -451,10 +455,11 @@ def add_centroid(centroids, name, Y, mask): groups = [groups] if isinstance(groups, str) else groups for name in groups: if name not in set(adata.obs[key].cat.categories): - raise ValueError( + msg = ( f"{name!r} is invalid! specify valid name, " f"one of {adata.obs[key].cat.categories}" ) + raise ValueError(msg) else: iname = np.flatnonzero( adata.obs[key].cat.categories.values == name @@ -844,23 +849,21 @@ def violin( ylabel = [ylabel] * (1 if groupby is None else len(keys)) if groupby is None: if len(ylabel) != 1: - raise ValueError( - f"Expected number of y-labels to be `1`, found `{len(ylabel)}`." - ) + msg = f"Expected number of y-labels to be `1`, found `{len(ylabel)}`." + raise ValueError(msg) elif len(ylabel) != len(keys): - raise ValueError( - f"Expected number of y-labels to be `{len(keys)}`, " - f"found `{len(ylabel)}`." - ) + msg = f"Expected number of y-labels to be `{len(keys)}`, found `{len(ylabel)}`." + raise ValueError(msg) if groupby is not None: obs_df = get.obs_df(adata, keys=[groupby] + keys, layer=layer, use_raw=use_raw) if kwds.get("palette") is None: if not isinstance(adata.obs[groupby].dtype, CategoricalDtype): - raise ValueError( + msg = ( f"The column `adata.obs[{groupby!r}]` needs to be categorical, " f"but is of dtype {adata.obs[groupby].dtype}." ) + raise ValueError(msg) _utils.add_colors_for_categorical_sample_annotation(adata, groupby) kwds["hue"] = groupby kwds["palette"] = dict( @@ -1022,7 +1025,8 @@ def clustermap( import seaborn as sns # Slow import, only import if called if not isinstance(obs_keys, str | NoneType): - raise ValueError("Currently, only a single key is supported.") + msg = "Currently, only a single key is supported." + raise ValueError(msg) sanitize_anndata(adata) use_raw = _check_use_raw(adata, use_raw) X = adata.raw.X if use_raw else adata.X @@ -1555,11 +1559,12 @@ def tracksplot( """ if groupby not in adata.obs_keys() or adata.obs[groupby].dtype.name != "category": - raise ValueError( + msg = ( "groupby has to be a valid categorical observation. " f"Given value: {groupby}, valid categorical observations: " - f'{[x for x in adata.obs_keys() if adata.obs[x].dtype.name == "category"]}' + f"{[x for x in adata.obs_keys() if adata.obs[x].dtype.name == 'category']}" ) + raise ValueError(msg) var_names, var_group_labels, var_group_positions = _check_var_names_type( var_names, var_group_labels, var_group_positions @@ -1891,7 +1896,8 @@ def correlation_matrix( dendrogram = ax is None if dendrogram: if ax is not None: - raise ValueError("Can only plot dendrogram when not plotting to an axis") + msg = "Can only plot dendrogram when not plotting to an axis" + raise ValueError(msg) assert (len(index)) == corr_matrix.shape[0] corr_matrix = corr_matrix[index, :] corr_matrix = corr_matrix[:, index] @@ -2059,10 +2065,11 @@ def _prepare_dataframe( f"Given {group}, is not in observations: {adata.obs_keys()}" + msg ) if group in adata.obs.columns and group == adata.obs.index.name: - raise ValueError( + msg = ( f"Given group {group} is both and index and a column level, " "which is ambiguous." ) + raise ValueError(msg) if group == adata.obs.index.name: groupby_index = group if groupby_index is not None: @@ -2277,19 +2284,12 @@ def _reorder_categories_after_dendrogram( 'var_group_labels', and 'var_group_positions' """ - dendrogram_key = _get_dendrogram_key(adata, dendrogram_key, groupby) - if isinstance(groupby, str): groupby = [groupby] - dendro_info = adata.uns[dendrogram_key] - if groupby != dendro_info["groupby"]: - raise ValueError( - "Incompatible observations. The precomputed dendrogram contains " - f"information for the observation: '{groupby}' while the plot is " - f"made for the observation: '{dendro_info['groupby']}. " - "Please run `sc.tl.dendrogram` using the right observation.'" - ) + dendro_info = adata.uns[ + _get_dendrogram_key(adata, dendrogram_key, groupby, validate_groupby=True) + ] if categories is None: categories = adata.obs[dendro_info["groupby"]].cat.categories @@ -2299,7 +2299,7 @@ def _reorder_categories_after_dendrogram( categories_ordered = dendro_info["categories_ordered"] if len(categories) != len(categories_idx_ordered): - raise ValueError( + msg = ( "Incompatible observations. Dendrogram data has " f"{len(categories_idx_ordered)} categories but current groupby " f"observation {groupby!r} contains {len(categories)} categories. " @@ -2307,6 +2307,7 @@ def _reorder_categories_after_dendrogram( "initial computation of `sc.tl.dendrogram`. " "Please run `sc.tl.dendrogram` again.'" ) + raise ValueError(msg) # reorder var_groups (if any) if var_group_positions is None or var_group_labels is None: @@ -2362,7 +2363,11 @@ def _format_first_three_categories(categories): def _get_dendrogram_key( - adata: AnnData, dendrogram_key: str | None, groupby: str | Sequence[str] + adata: AnnData, + dendrogram_key: str | None, + groupby: str | Sequence[str], + *, + validate_groupby: bool = False, ) -> str: # the `dendrogram_key` can be a bool an NoneType or the name of the # dendrogram key. By default the name of the dendrogram key is 'dendrogram' @@ -2370,7 +2375,7 @@ def _get_dendrogram_key( if isinstance(groupby, str): dendrogram_key = f"dendrogram_{groupby}" elif isinstance(groupby, Sequence): - dendrogram_key = f'dendrogram_{"_".join(groupby)}' + dendrogram_key = f"dendrogram_{'_'.join(groupby)}" else: msg = f"groupby has wrong type: {type(groupby).__name__}." raise AssertionError(msg) @@ -2386,10 +2391,22 @@ def _get_dendrogram_key( dendrogram(adata, groupby, key_added=dendrogram_key) if "dendrogram_info" not in adata.uns[dendrogram_key]: - raise ValueError( + msg = ( f"The given dendrogram key ({dendrogram_key!r}) does not contain " "valid dendrogram information." ) + raise ValueError(msg) + + if validate_groupby: + existing_groupby = adata.uns[dendrogram_key]["groupby"] + if groupby != existing_groupby: + msg = ( + "Incompatible observations. The precomputed dendrogram contains " + f"information for the observation: {groupby!r} while the plot is " + f"made for the observation: {existing_groupby!r}. " + "Please run `sc.tl.dendrogram` using the right observation.'" + ) + raise ValueError(msg) return dendrogram_key diff --git a/src/scanpy/plotting/_baseplot_class.py b/src/scanpy/plotting/_baseplot_class.py index fff1b40322..e14d387f84 100644 --- a/src/scanpy/plotting/_baseplot_class.py +++ b/src/scanpy/plotting/_baseplot_class.py @@ -899,23 +899,18 @@ def _format_first_three_categories(_categories): _categories = _categories[:3] + ["etc."] return ", ".join(_categories) - key = _get_dendrogram_key(self.adata, dendrogram_key, self.groupby) - - dendro_info = self.adata.uns[key] - if self.groupby != dendro_info["groupby"]: - raise ValueError( - "Incompatible observations. The precomputed dendrogram contains " - f"information for the observation: '{self.groupby}' while the plot is " - f"made for the observation: '{dendro_info['groupby']}. " - "Please run `sc.tl.dendrogram` using the right observation.'" + dendro_info = self.adata.uns[ + _get_dendrogram_key( + self.adata, dendrogram_key, self.groupby, validate_groupby=True ) + ] # order of groupby categories categories_idx_ordered = dendro_info["categories_idx_ordered"] categories_ordered = dendro_info["categories_ordered"] if len(self.categories) != len(categories_idx_ordered): - raise ValueError( + msg = ( "Incompatible observations. Dendrogram data has " f"{len(categories_idx_ordered)} categories but current groupby " f"observation {self.groupby!r} contains {len(self.categories)} categories. " @@ -923,6 +918,7 @@ def _format_first_three_categories(_categories): "initial computation of `sc.tl.dendrogram`. " "Please run `sc.tl.dendrogram` again.'" ) + raise ValueError(msg) # reorder var_groups (if any) if self.var_names is not None: diff --git a/src/scanpy/plotting/_dotplot.py b/src/scanpy/plotting/_dotplot.py index e2ae434db6..da3d16379b 100644 --- a/src/scanpy/plotting/_dotplot.py +++ b/src/scanpy/plotting/_dotplot.py @@ -681,11 +681,11 @@ def _dotplot( """ assert dot_size.shape == dot_color.shape, ( - "please check that dot_size " "and dot_color dataframes have the same shape" + "please check that dot_size and dot_color dataframes have the same shape" ) assert list(dot_size.index) == list(dot_color.index), ( - "please check that dot_size " "and dot_color dataframes have the same index" + "please check that dot_size and dot_color dataframes have the same index" ) assert list(dot_size.columns) == list(dot_color.columns), ( @@ -721,12 +721,14 @@ def _dotplot( dot_max = np.ceil(max(frac) * 10) / 10 else: if dot_max < 0 or dot_max > 1: - raise ValueError("`dot_max` value has to be between 0 and 1") + msg = "`dot_max` value has to be between 0 and 1" + raise ValueError(msg) if dot_min is None: dot_min = 0 else: if dot_min < 0 or dot_min > 1: - raise ValueError("`dot_min` value has to be between 0 and 1") + msg = "`dot_min` value has to be between 0 and 1" + raise ValueError(msg) if dot_min != 0 or dot_max != 1: # clip frac between dot_min and dot_max diff --git a/src/scanpy/plotting/_scrublet.py b/src/scanpy/plotting/_scrublet.py index 4a1247574d..050aec6f53 100644 --- a/src/scanpy/plotting/_scrublet.py +++ b/src/scanpy/plotting/_scrublet.py @@ -72,9 +72,8 @@ def scrublet_score_distribution( """ if "scrublet" not in adata.uns: - raise ValueError( - "Please run scrublet before trying to generate the scrublet plot." - ) + msg = "Please run scrublet before trying to generate the scrublet plot." + raise ValueError(msg) # If batched_by is populated, then we know Scrublet was run over multiple batches diff --git a/src/scanpy/plotting/_stacked_violin.py b/src/scanpy/plotting/_stacked_violin.py index e47680facc..3c58ead35f 100644 --- a/src/scanpy/plotting/_stacked_violin.py +++ b/src/scanpy/plotting/_stacked_violin.py @@ -750,7 +750,7 @@ def stacked_violin( e.g. `'red'` or `'#cc33ff'`. {show_save_ax} {vminmax} - kwds + **kwds Are passed to :func:`~seaborn.violinplot`. Returns diff --git a/src/scanpy/plotting/_tools/__init__.py b/src/scanpy/plotting/_tools/__init__.py index a421f6b94a..8f189121e2 100644 --- a/src/scanpy/plotting/_tools/__init__.py +++ b/src/scanpy/plotting/_tools/__init__.py @@ -158,14 +158,14 @@ def pca_loadings( components = np.array(components) - 1 if np.any(components < 0): - raise ValueError("Component indices must be greater than zero.") + msg = "Component indices must be greater than zero." + raise ValueError(msg) if n_points is None: n_points = min(30, adata.n_vars) elif adata.n_vars < n_points: - raise ValueError( - f"Tried to plot {n_points} variables, but passed anndata only has {adata.n_vars}." - ) + msg = f"Tried to plot {n_points} variables, but passed anndata only has {adata.n_vars}." + raise ValueError(msg) ranking( adata, @@ -398,10 +398,11 @@ def rank_genes_groups( """ n_panels_per_row = kwds.get("n_panels_per_row", ncols) if n_genes < 1: - raise NotImplementedError( + msg = ( "Specifying a negative number for n_genes has not been implemented for " - f"this plot. Received n_genes={n_genes}." + f"this plot. Received {n_genes=!r}." ) + raise NotImplementedError(msg) reference = str(adata.uns[key]["params"]["reference"]) group_names = adata.uns[key]["names"].dtype.names if groups is None else groups @@ -517,10 +518,11 @@ def _rank_genes_groups_plot( Common function to call the different rank_genes_groups_* plots """ if var_names is not None and n_genes is not None: - raise ValueError( + msg = ( "The arguments n_genes and var_names are mutually exclusive. Please " "select only one." ) + raise ValueError(msg) if var_names is None and n_genes is None: # set n_genes = 10 as default when none of the options is given @@ -694,7 +696,6 @@ def rank_genes_groups_heatmap( {show_save_ax} **kwds Are passed to :func:`~scanpy.pl.heatmap`. - {show_save_ax} Examples -------- @@ -778,7 +779,6 @@ def rank_genes_groups_tracksplot( {show_save_ax} **kwds Are passed to :func:`~scanpy.pl.tracksplot`. - {show_save_ax} Examples -------- @@ -1313,9 +1313,7 @@ def rank_genes_groups_violin( _ax.set_ylabel("expression") _ax.set_xticklabels(new_gene_names, rotation="vertical") writekey = ( - f"rank_genes_groups_" - f"{adata.uns[key]['params']['groupby']}_" - f"{group_name}" + f"rank_genes_groups_{adata.uns[key]['params']['groupby']}_{group_name}" ) savefig_or_show(writekey, show=show, save=save) axs.append(_ax) @@ -1527,7 +1525,8 @@ def embedding_density( basis = "draw_graph_fa" if key is not None and groupby is not None: - raise ValueError("either pass key or groupby but not both") + msg = "either pass key or groupby but not both" + raise ValueError(msg) if key is None: key = "umap_density" @@ -1535,16 +1534,17 @@ def embedding_density( key += f"_{groupby}" if f"X_{basis}" not in adata.obsm_keys(): - raise ValueError( - f"Cannot find the embedded representation `adata.obsm[X_{basis!r}]`. " + msg = ( + f"Cannot find the embedded representation `adata.obsm['X_{basis}']`. " "Compute the embedding first." ) + raise ValueError(msg) if key not in adata.obs or f"{key}_params" not in adata.uns: - raise ValueError( - "Please run `sc.tl.embedding_density()` first " - "and specify the correct key." + msg = ( + "Please run `sc.tl.embedding_density()` first and specify the correct key." ) + raise ValueError(msg) if "components" in kwargs: logg.warning( @@ -1563,10 +1563,11 @@ def embedding_density( group = [group] if group is None and groupby is not None: - raise ValueError( + msg = ( "Densities were calculated over an `.obs` covariate. " "Please specify a group from this covariate to plot." ) + raise ValueError(msg) if group is not None and groupby is None: logg.warning( @@ -1576,7 +1577,8 @@ def embedding_density( group = None if np.min(adata.obs[key]) < 0 or np.max(adata.obs[key]) > 1: - raise ValueError("Densities should be scaled between 0 and 1.") + msg = "Densities should be scaled between 0 and 1." + raise ValueError(msg) if wspace is None: # try to set a wspace that is not too large or too small given the @@ -1601,17 +1603,19 @@ def embedding_density( # (even if only one group is set) if group is not None and not isinstance(group, str) and isinstance(group, Sequence): if ax is not None: - raise ValueError("Can only specify `ax` if no `group` sequence is given.") + msg = "Can only specify `ax` if no `group` sequence is given." + raise ValueError(msg) fig, gs = _panel_grid(hspace, wspace, ncols, len(group)) axs = [] for count, group_name in enumerate(group): if group_name not in adata.obs[groupby].cat.categories: - raise ValueError( + msg = ( "Please specify a group from the `.obs` category " "over which the density was calculated. " f"Invalid group name: {group_name}" ) + raise ValueError(msg) ax = plt.subplot(gs[count]) # Define plotting data @@ -1743,9 +1747,8 @@ def _get_values_to_plot( "log10_pvals_adj", ] if values_to_plot not in valid_options: - raise ValueError( - f"given value_to_plot: '{values_to_plot}' is not valid. Valid options are {valid_options}" - ) + msg = f"given value_to_plot: '{values_to_plot}' is not valid. Valid options are {valid_options}" + raise ValueError(msg) values_df = None check_done = False diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py index e67e6e2ece..a4b2de3441 100644 --- a/src/scanpy/plotting/_tools/paga.py +++ b/src/scanpy/plotting/_tools/paga.py @@ -239,10 +239,11 @@ def _compute_pos( nx_g_tree = nx.Graph(adj_tree) pos = _utils.hierarchy_pos(nx_g_tree, root) if len(pos) < adjacency_solid.shape[0]: - raise ValueError( + msg = ( "This is a forest and not a single tree. " "Try another `layout`, e.g., {'fr'}." ) + raise ValueError(msg) else: # igraph layouts random.seed(random_state.bytes(8)) @@ -547,10 +548,8 @@ def is_flat(x): if isinstance(root, str): if root not in labels: - raise ValueError( - "If `root` is a string, " - f"it needs to be one of {labels} not {root!r}." - ) + msg = f"If `root` is a string, it needs to be one of {labels} not {root!r}." + raise ValueError(msg) root = list(labels).index(root) if isinstance(root, Sequence) and root[0] in labels: root = [list(labels).index(r) for r in root] @@ -731,10 +730,11 @@ def _paga_graph( else: pos = Path(pos) if pos.suffix != ".gdf": - raise ValueError( + msg = ( "Currently only supporting reading positions from .gdf files. " "Consider generating them using, for instance, Gephi." ) + raise ValueError(msg) s = "" # read the node definition from the file with pos.open() as f: f.readline() @@ -762,7 +762,8 @@ def _paga_graph( elif colors == "degree_solid": colors = [d for _, d in nx_g_solid.degree(weight="weight")] else: - raise ValueError('`degree` either "degree_dashed" or "degree_solid".') + msg = '`degree` either "degree_dashed" or "degree_solid".' + raise ValueError(msg) colors = (np.array(colors) - np.min(colors)) / (np.max(colors) - np.min(colors)) # plot gene expression @@ -811,10 +812,11 @@ def _paga_graph( colors = asso_colors if len(colors) != len(node_labels): - raise ValueError( + msg = ( f"Expected `colors` to be of length `{len(node_labels)}`, " f"found `{len(colors)}`." ) + raise ValueError(msg) # count number of connected components n_components, labels = scipy.sparse.csgraph.connected_components(adjacency_solid) @@ -839,7 +841,8 @@ def _paga_graph( ) nx_g_solid = nx.Graph(adjacency_solid) if dashed_edges is not None: - raise ValueError("`single_component` only if `dashed_edges` is `None`.") + msg = "`single_component` only if `dashed_edges` is `None`." + raise ValueError(msg) # edge widths base_edge_width = edge_width_scale * 5 * rcParams["lines.linewidth"] @@ -958,10 +961,11 @@ def _paga_graph( else: for ix, (xx, yy) in enumerate(zip(pos_array[:, 0], pos_array[:, 1])): if not isinstance(colors[ix], Mapping): - raise ValueError( + msg = ( f"{colors[ix]} is neither a dict of valid " "matplotlib colors nor a valid matplotlib color." ) + raise ValueError(msg) color_single = colors[ix].keys() fracs = [colors[ix][c] for c in color_single] total = sum(fracs) @@ -971,10 +975,11 @@ def _paga_graph( color_single.append("grey") fracs.append(1 - sum(fracs)) elif not np.isclose(total, 1): - raise ValueError( + msg = ( f"Expected fractions for node `{ix}` to be " f"close to 1, found `{total}`." ) + raise ValueError(msg) cumsum = np.cumsum(fracs) cumsum = cumsum / cumsum[-1] @@ -1125,18 +1130,20 @@ def paga_path( if groups_key is None: if "groups" not in adata.uns["paga"]: - raise KeyError( + msg = ( "Pass the key of the grouping with which you ran PAGA, " "using the parameter `groups_key`." ) + raise KeyError(msg) groups_key = adata.uns["paga"]["groups"] groups_names = adata.obs[groups_key].cat.categories if "dpt_pseudotime" not in adata.obs.columns: - raise ValueError( + msg = ( "`pl.paga_path` requires computation of a pseudotime `tl.dpt` " "for ordering at single-cell resolution" ) + raise ValueError(msg) if palette_groups is None: _utils.add_colors_for_categorical_sample_annotation(adata, groups_key) @@ -1157,10 +1164,11 @@ def moving_average(a): groups_names_set = set(groups_names) for node in nodes: if node not in groups_names_set: - raise ValueError( + msg = ( f"Each node/group needs to be in {groups_names.tolist()} " - f"(`groups_key`={groups_key!r}) not {node!r}." + f"({groups_key=!r}) not {node!r}." ) + raise ValueError(msg) nodes_ints.append(groups_names.get_loc(node)) nodes_strs = nodes else: @@ -1178,12 +1186,13 @@ def moving_average(a): adata.obs[groups_key].values == nodes_strs[igroup] ] if len(idcs) == 0: - raise ValueError( + msg = ( "Did not find data points that match " f"`adata.obs[{groups_key!r}].values == {str(group)!r}`. " f"Check whether `adata.obs[{groups_key!r}]` " "actually contains what you expect." ) + raise ValueError(msg) idcs_group = np.argsort( adata.obs["dpt_pseudotime"].values[ adata.obs[groups_key].values == nodes_strs[igroup] diff --git a/src/scanpy/plotting/_tools/scatterplots.py b/src/scanpy/plotting/_tools/scatterplots.py index b54897678f..cb3c9d7c66 100644 --- a/src/scanpy/plotting/_tools/scatterplots.py +++ b/src/scanpy/plotting/_tools/scatterplots.py @@ -149,7 +149,8 @@ def embedding( # Checking the mask format and if used together with groups if groups is not None and mask_obs is not None: - raise ValueError("Groups and mask arguments are incompatible.") + msg = "Groups and mask arguments are incompatible." + raise ValueError(msg) mask_obs = _check_mask(adata, mask_obs, "obs") # Figure out if we're using raw @@ -157,15 +158,17 @@ def embedding( # check if adata.raw is set use_raw = layer is None and adata.raw is not None if use_raw and layer is not None: - raise ValueError( - "Cannot use both a layer and the raw representation. Was passed:" - f"use_raw={use_raw}, layer={layer}." + msg = ( + "Cannot use both a layer and the raw representation. " + f"Was passed: {use_raw=!r}, {layer=!r}." ) + raise ValueError(msg) if use_raw and adata.raw is None: - raise ValueError( + msg = ( "`use_raw` is set to True but AnnData object does not have raw. " "Please check." ) + raise ValueError(msg) if isinstance(groups, str): groups = [groups] @@ -173,7 +176,8 @@ def embedding( # Color map if color_map is not None: if cmap is not None: - raise ValueError("Cannot specify both `color_map` and `cmap`.") + msg = "Cannot specify both `color_map` and `cmap`." + raise ValueError(msg) else: cmap = color_map cmap = copy(colormaps.get_cmap(cmap)) @@ -245,10 +249,11 @@ def embedding( not isinstance(color, str) and isinstance(color, Sequence) and len(color) > 1 ) or len(dimensions) > 1: if ax is not None: - raise ValueError( + msg = ( "Cannot specify `ax` when plotting multiple panels " "(each for a given value of 'color')." ) + raise ValueError(msg) # each plot needs to be its own panel fig, grid = _panel_grid(hspace, wspace, ncols, len(color)) @@ -810,9 +815,8 @@ def draw_graph( layout = str(adata.uns["draw_graph"]["params"]["layout"]) basis = f"draw_graph_{layout}" if f"X_{basis}" not in adata.obsm_keys(): - raise ValueError( - f"Did not find {basis} in adata.obs. Did you compute layout {layout}?" - ) + msg = f"Did not find {basis} in adata.obs. Did you compute layout {layout}?" + raise ValueError(msg) return embedding(adata, basis, **kwargs) @@ -883,10 +887,11 @@ def pca( adata, "pca", show=show, return_fig=return_fig, save=save, **kwargs ) if "pca" not in adata.obsm and "X_pca" not in adata.obsm: - raise KeyError( + msg = ( f"Could not find entry in `obsm` for 'pca'.\n" f"Available keys are: {list(adata.obsm.keys())}." ) + raise KeyError(msg) label_dict = { f"PC{i + 1}": f"PC{i + 1} ({round(v * 100, 2)}%)" @@ -1060,7 +1065,8 @@ def _components_to_dimensions( if components is None and dimensions is None: dimensions = [tuple(i for i in range(ndims))] elif components is not None and dimensions is not None: - raise ValueError("Cannot provide both dimensions and components") + msg = "Cannot provide both dimensions and components" + raise ValueError(msg) # TODO: Consider deprecating this # If components is not None, parse them and set dimensions @@ -1099,9 +1105,8 @@ def _add_categorical_legend( """Add a legend to the passed Axes.""" if na_in_legend and pd.isnull(color_source_vector).any(): if "NA" in color_source_vector: - raise NotImplementedError( - "No fallback for null labels has been defined if NA already in categories." - ) + msg = "No fallback for null labels has been defined if NA already in categories." + raise NotImplementedError(msg) color_source_vector = color_source_vector.add_categories("NA").fillna("NA") palette = palette.copy() palette["NA"] = na_color @@ -1162,7 +1167,8 @@ def _get_basis(adata: AnnData, basis: str) -> np.ndarray: elif f"X_{basis}" in adata.obsm: return adata.obsm[f"X_{basis}"] else: - raise KeyError(f"Could not find '{basis}' or 'X_{basis}' in .obsm") + msg = f"Could not find {basis!r} or 'X_{basis}' in .obsm" + raise KeyError(msg) def _get_color_source_vector( @@ -1294,10 +1300,11 @@ def _check_spot_size(spatial_data: Mapping | None, spot_size: float | None) -> f This is a required argument for spatial plots. """ if spatial_data is None and spot_size is None: - raise ValueError( + msg = ( "When .uns['spatial'][library_id] does not exist, spot_size must be " "provided directly." ) + raise ValueError(msg) elif spot_size is None: return spatial_data["scalefactors"]["spot_diameter_fullres"] else: @@ -1329,10 +1336,11 @@ def _check_spatial_data( spatial_mapping = uns.get("spatial", {}) if library_id is _empty: if len(spatial_mapping) > 1: - raise ValueError( + msg = ( "Found multiple possible libraries in `.uns['spatial']. Please specify." f" Options are:\n\t{list(spatial_mapping.keys())}" ) + raise ValueError(msg) elif len(spatial_mapping) == 1: library_id = list(spatial_mapping.keys())[0] else: @@ -1370,7 +1378,8 @@ def _check_crop_coord( if crop_coord is None: return None if len(crop_coord) != 4: - raise ValueError("Invalid crop_coord of length {len(crop_coord)}(!=4)") + msg = "Invalid crop_coord of length {len(crop_coord)}(!=4)" + raise ValueError(msg) crop_coord = tuple(c * scale_factor for c in crop_coord) return crop_coord @@ -1389,7 +1398,8 @@ def _broadcast_args(*args): lens = [len(arg) for arg in args] longest = max(lens) if not (set(lens) == {1, longest} or set(lens) == {longest}): - raise ValueError(f"Could not broadcast together arguments with shapes: {lens}.") + msg = f"Could not broadcast together arguments with shapes: {lens}." + raise ValueError(msg) return list( [[arg[0] for _ in range(longest)] if len(arg) == 1 else arg for arg in args] ) diff --git a/src/scanpy/plotting/_utils.py b/src/scanpy/plotting/_utils.py index 09a01a9bc5..b6cd920039 100644 --- a/src/scanpy/plotting/_utils.py +++ b/src/scanpy/plotting/_utils.py @@ -398,7 +398,7 @@ def _validate_palette(adata: AnnData, key: str) -> None: else: logg.warning( f"The following color value found in adata.uns['{key}_colors'] " - f"is not valid: '{color}'. Default colors will be used instead." + f"is not valid: {color!r}. Default colors will be used instead." ) _set_default_colors_for_categorical_obs(adata, key) _palette = None @@ -466,21 +466,24 @@ def _set_colors_for_categorical_obs( if color in additional_colors: color = additional_colors[color] else: - raise ValueError( + msg = ( "The following color value of the given palette " f"is not valid: {color}" ) + raise ValueError(msg) _color_list.append(color) palette = cycler(color=_color_list) if not isinstance(palette, Cycler): - raise ValueError( + msg = ( "Please check that the value of 'palette' is a valid " "matplotlib colormap string (eg. Set2), a list of color names " "or a cycler with a 'color' key." ) + raise ValueError(msg) if "color" not in palette.keys: - raise ValueError("Please set the palette key 'color'.") + msg = "Please set the palette key 'color'." + raise ValueError(msg) cc = palette() colors_list = [to_hex(next(cc)["color"]) for x in range(len(categories))] @@ -556,7 +559,8 @@ def plot_edges(axs, adata, basis, edges_width, edges_color, *, neighbors_key=Non if neighbors_key is None: neighbors_key = "neighbors" if neighbors_key not in adata.uns: - raise ValueError("`edges=True` requires `pp.neighbors` to be run before.") + msg = "`edges=True` requires `pp.neighbors` to be run before." + raise ValueError(msg) neighbors = NeighborsView(adata, neighbors_key) g = nx.Graph(neighbors["connectivities"]) basis_key = _get_basis(adata, basis) @@ -582,11 +586,12 @@ def plot_arrows(axs, adata, basis, arrows_kwds=None): (p for p in ["velocity", "Delta"] if f"{p}_{basis}" in adata.obsm), None ) if v_prefix is None: - raise ValueError( + msg = ( "`arrows=True` requires " f"`'velocity_{basis}'` from scvelo or " f"`'Delta_{basis}'` from velocyto." ) + raise ValueError(msg) if v_prefix == "velocity": logg.warning( "The module `scvelo` has improved plotting facilities. " @@ -628,7 +633,8 @@ def scatter_group( color = rgb2hex(adata.uns[key + "_colors"][cat_code]) if not is_color_like(color): - raise ValueError(f'"{color}" is not a valid matplotlib color.') + msg = f"{color!r} is not a valid matplotlib color." + raise ValueError(msg) data = [Y[mask_obs, 0], Y[mask_obs, 1]] if projection == "3d": data.append(Y[mask_obs, 2]) @@ -658,7 +664,8 @@ def setup_axes( """Grid of axes for plotting, legends and colorbars.""" check_projection(projection) if left_margin is not None: - raise NotImplementedError("We currently don’t support `left_margin`.") + msg = "We currently don’t support `left_margin`." + raise NotImplementedError(msg) if np.any(colorbars) and right_margin is None: right_margin = 1 - rcParams["figure.subplot.right"] + 0.21 # 0.25 elif right_margin is None: @@ -801,7 +808,8 @@ def scatter_base( elif projection == "3d": data = Y_sort[:, 0], Y_sort[:, 1], Y_sort[:, 2] else: - raise ValueError(f"Unknown projection {projection!r} not in '2d', '3d'") + msg = f"Unknown projection {projection!r} not in '2d', '3d'" + raise ValueError(msg) if not isinstance(color, str) or color != "white": sct = ax.scatter( *data, @@ -1148,15 +1156,15 @@ def data_to_axis_points(ax: Axes, points_data: np.ndarray): def check_projection(projection): """Validation for projection argument.""" if projection not in {"2d", "3d"}: - raise ValueError(f"Projection must be '2d' or '3d', was '{projection}'.") + msg = f"Projection must be '2d' or '3d', was '{projection}'." + raise ValueError(msg) if projection == "3d": from packaging.version import parse mpl_version = parse(mpl.__version__) if mpl_version < parse("3.3.3"): - raise ImportError( - f"3d plotting requires matplotlib > 3.3.3. Found {mpl.__version__}" - ) + msg = f"3d plotting requires matplotlib > 3.3.3. Found {mpl.__version__}" + raise ImportError(msg) def circles( @@ -1300,7 +1308,8 @@ def check_colornorm(vmin=None, vmax=None, vcenter=None, norm=None): if norm is not None: if (vmin is not None) or (vmax is not None) or (vcenter is not None): - raise ValueError("Passing both norm and vmin/vmax/vcenter is not allowed.") + msg = "Passing both norm and vmin/vmax/vcenter is not allowed." + raise ValueError(msg) else: if vcenter is not None: norm = DivNorm(vmin=vmin, vmax=vmax, vcenter=vcenter) diff --git a/src/scanpy/preprocessing/_combat.py b/src/scanpy/preprocessing/_combat.py index caeb9a0b45..93052f356c 100644 --- a/src/scanpy/preprocessing/_combat.py +++ b/src/scanpy/preprocessing/_combat.py @@ -179,21 +179,23 @@ def combat( # check the input if key not in adata.obs_keys(): - raise ValueError(f"Could not find the key {key!r} in adata.obs") + msg = f"Could not find the key {key!r} in adata.obs" + raise ValueError(msg) if covariates is not None: cov_exist = np.isin(covariates, adata.obs_keys()) if np.any(~cov_exist): missing_cov = np.array(covariates)[~cov_exist].tolist() - raise ValueError( - f"Could not find the covariate(s) {missing_cov!r} in adata.obs" - ) + msg = f"Could not find the covariate(s) {missing_cov!r} in adata.obs" + raise ValueError(msg) if key in covariates: - raise ValueError("Batch key and covariates cannot overlap") + msg = "Batch key and covariates cannot overlap" + raise ValueError(msg) if len(covariates) != len(set(covariates)): - raise ValueError("Covariates must be unique") + msg = "Covariates must be unique" + raise ValueError(msg) # only works on dense matrices so far X = adata.X.toarray().T if issparse(adata.X) else adata.X.T diff --git a/src/scanpy/preprocessing/_deprecated/__init__.py b/src/scanpy/preprocessing/_deprecated/__init__.py index c23361631a..b821417c0b 100644 --- a/src/scanpy/preprocessing/_deprecated/__init__.py +++ b/src/scanpy/preprocessing/_deprecated/__init__.py @@ -36,7 +36,8 @@ def normalize_per_cell_weinreb16_deprecated( Normalized version of the original expression matrix. """ if max_fraction < 0 or max_fraction > 1: - raise ValueError("Choose max_fraction between 0 and 1.") + msg = "Choose max_fraction between 0 and 1." + raise ValueError(msg) counts_per_cell = x.sum(1).A1 if issparse(x) else x.sum(1) gene_subset = np.all(x <= counts_per_cell[:, None] * max_fraction, axis=0) diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py index 27e8f1f846..bba4fb9bbf 100644 --- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py +++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py @@ -214,7 +214,8 @@ def filter_genes_dispersion( / disp_mad_bin[df["mean_bin"].values].values ) else: - raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"') + msg = '`flavor` needs to be "seurat" or "cell_ranger"' + raise ValueError(msg) dispersion_norm = df["dispersion_norm"].values.astype("float32") if n_top_genes is not None: dispersion_norm = dispersion_norm[~np.isnan(dispersion_norm)] @@ -268,7 +269,8 @@ def filter_genes_fano_deprecated(X, Ecutoff, Vcutoff): def _filter_genes(X, e_cutoff, v_cutoff, meth): """See `filter_genes_dispersion` :cite:p:`Weinreb2017`.""" if issparse(X): - raise ValueError("Not defined for sparse input. See `filter_genes_dispersion`.") + msg = "Not defined for sparse input. See `filter_genes_dispersion`." + raise ValueError(msg) mean_filter = np.mean(X, axis=0) > e_cutoff var_filter = meth(X, axis=0) / (np.mean(X, axis=0) + 0.0001) > v_cutoff gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0] diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index e34340b256..356fa8f03f 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -65,15 +65,14 @@ def _highly_variable_genes_seurat_v3( try: from skmisc.loess import loess except ImportError: - raise ImportError( - "Please install skmisc package via `pip install --user scikit-misc" - ) + msg = "Please install skmisc package via `pip install --user scikit-misc" + raise ImportError(msg) df = pd.DataFrame(index=adata.var_names) data = _get_obs_rep(adata, layer=layer) if check_values and not check_nonnegative_integers(data): warnings.warn( - f"`flavor='{flavor}'` expects raw count data, but non-integers were found.", + f"`{flavor=!r}` expects raw count data, but non-integers were found.", UserWarning, ) @@ -159,7 +158,8 @@ def _highly_variable_genes_seurat_v3( sort_cols = ["highly_variable_nbatches", "highly_variable_rank"] sort_ascending = [False, True] else: - raise ValueError(f"Did not recognize flavor {flavor}") + msg = f"Did not recognize flavor {flavor}" + raise ValueError(msg) sorted_index = ( df[sort_cols] .sort_values(sort_cols, ascending=sort_ascending, na_position="last") @@ -332,7 +332,8 @@ def _get_mean_bins( elif flavor == "cell_ranger": bins = np.r_[-np.inf, np.percentile(means, np.arange(10, 105, 5)), np.inf] else: - raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"') + msg = '`flavor` needs to be "seurat" or "cell_ranger"' + raise ValueError(msg) return pd.cut(means, bins=bins) @@ -347,7 +348,8 @@ def _get_disp_stats( elif flavor == "cell_ranger": disp_bin_stats = disp_grouped.agg(avg="median", dev=_mad) else: - raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"') + msg = '`flavor` needs to be "seurat" or "cell_ranger"' + raise ValueError(msg) return disp_bin_stats.loc[df["mean_bin"]].set_index(df.index) @@ -647,10 +649,11 @@ def highly_variable_genes( start = logg.info("extracting highly variable genes") if not isinstance(adata, AnnData): - raise ValueError( + msg = ( "`pp.highly_variable_genes` expects an `AnnData` argument, " "pass `inplace=False` if you want to return a `pd.DataFrame`." ) + raise ValueError(msg) if flavor in {"seurat_v3", "seurat_v3_paper"}: if n_top_genes is None: diff --git a/src/scanpy/preprocessing/_normalization.py b/src/scanpy/preprocessing/_normalization.py index c888ded9c6..e1ee3d4822 100644 --- a/src/scanpy/preprocessing/_normalization.py +++ b/src/scanpy/preprocessing/_normalization.py @@ -175,11 +175,13 @@ def normalize_total( """ if copy: if not inplace: - raise ValueError("`copy=True` cannot be used with `inplace=False`.") + msg = "`copy=True` cannot be used with `inplace=False`." + raise ValueError(msg) adata = adata.copy() if max_fraction < 0 or max_fraction > 1: - raise ValueError("Choose max_fraction between 0 and 1.") + msg = "Choose max_fraction between 0 and 1." + raise ValueError(msg) # Deprecated features if layers is not None: @@ -200,9 +202,8 @@ def normalize_total( if layers == "all": layers = adata.layers.keys() elif isinstance(layers, str): - raise ValueError( - f"`layers` needs to be a list of strings or 'all', not {layers!r}" - ) + msg = f"`layers` needs to be a list of strings or 'all', not {layers!r}" + raise ValueError(msg) view_to_actual(adata) @@ -254,7 +255,8 @@ def normalize_total( elif layer_norm is None: after = None else: - raise ValueError('layer_norm should be "after", "X" or None') + msg = 'layer_norm should be "after", "X" or None' + raise ValueError(msg) for layer_to_norm in layers if layers is not None else (): res = normalize_total( diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 3fd288ad93..db7886a29f 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -208,7 +208,8 @@ def pca( logg_start = logg.info("computing PCA") if layer is not None and chunked: # Current chunking implementation relies on pca being called on X - raise NotImplementedError("Cannot use `layer` and `chunked` at the same time.") + msg = "Cannot use `layer` and `chunked` at the same time." + raise NotImplementedError(msg) # chunked calculation is not randomized, anyways if svd_solver in {"auto", "randomized"} and not chunked: @@ -220,9 +221,8 @@ def pca( data_is_AnnData = isinstance(data, AnnData) if data_is_AnnData: if layer is None and not chunked and is_backed_type(data.X): - raise NotImplementedError( - f"PCA is not implemented for matrices of type {type(data.X)} with chunked as False" - ) + msg = f"PCA is not implemented for matrices of type {type(data.X)} with chunked as False" + raise NotImplementedError(msg) adata = data.copy() if copy else data else: if pkg_version("anndata") < Version("0.8.0rc1"): @@ -239,13 +239,12 @@ def pca( min_dim = min(adata_comp.n_vars, adata_comp.n_obs) n_comps = min_dim - 1 if min_dim <= settings.N_PCS else settings.N_PCS - logg.info(f" with n_comps={n_comps}") + logg.info(f" with {n_comps=}") X = _get_obs_rep(adata_comp, layer=layer) if is_backed_type(X) and layer is not None: - raise NotImplementedError( - f"PCA is not implemented for matrices of type {type(X)} from layers" - ) + msg = f"PCA is not implemented for matrices of type {type(X)} from layers" + raise NotImplementedError(msg) # See: https://github.com/scverse/scanpy/pull/2816#issuecomment-1932650529 if ( Version(ad.__version__) < Version("0.9") diff --git a/src/scanpy/preprocessing/_qc.py b/src/scanpy/preprocessing/_qc.py index 87ad51d420..5af8def042 100644 --- a/src/scanpy/preprocessing/_qc.py +++ b/src/scanpy/preprocessing/_qc.py @@ -32,10 +32,11 @@ def _choose_mtx_rep(adata, *, use_raw: bool = False, layer: str | None = None): is_layer = layer is not None if use_raw and is_layer: - raise ValueError( + msg = ( "Cannot use expression from both layer and raw. You provided:" - f"'use_raw={use_raw}' and 'layer={layer}'" + f"{use_raw=!r} and {layer=!r}" ) + raise ValueError(msg) if is_layer: return adata.layers[layer] elif use_raw: @@ -384,7 +385,8 @@ def top_proportions_sparse_csr(data, indptr, n): def check_ns(func): def check_ns_inner(mtx: np.ndarray | spmatrix | DaskArray, ns: Collection[int]): if not (max(ns) <= mtx.shape[1] and min(ns) > 0): - raise IndexError("Positions outside range of features.") + msg = "Positions outside range of features." + raise IndexError(msg) return func(mtx, ns) return check_ns_inner diff --git a/src/scanpy/preprocessing/_recipes.py b/src/scanpy/preprocessing/_recipes.py index 4b97405df9..4748d75e5c 100644 --- a/src/scanpy/preprocessing/_recipes.py +++ b/src/scanpy/preprocessing/_recipes.py @@ -59,7 +59,8 @@ def recipe_weinreb17( from ._deprecated import normalize_per_cell_weinreb16_deprecated, zscore_deprecated if issparse(adata.X): - raise ValueError("`recipe_weinreb16 does not support sparse matrices.") + msg = "`recipe_weinreb16 does not support sparse matrices." + raise ValueError(msg) if copy: adata = adata.copy() if log: diff --git a/src/scanpy/preprocessing/_scale.py b/src/scanpy/preprocessing/_scale.py index bac08f246b..ee15f977b9 100644 --- a/src/scanpy/preprocessing/_scale.py +++ b/src/scanpy/preprocessing/_scale.py @@ -133,13 +133,11 @@ def scale( """ _check_array_function_arguments(layer=layer, obsm=obsm) if layer is not None: - raise ValueError( - f"`layer` argument inappropriate for value of type {type(data)}" - ) + msg = f"`layer` argument inappropriate for value of type {type(data)}" + raise ValueError(msg) if obsm is not None: - raise ValueError( - f"`obsm` argument inappropriate for value of type {type(data)}" - ) + msg = f"`obsm` argument inappropriate for value of type {type(data)}" + raise ValueError(msg) return scale_array( data, zero_center=zero_center, max_value=max_value, copy=copy, mask_obs=mask_obs ) @@ -184,7 +182,7 @@ def scale_array( if not zero_center and max_value is not None: logg.info( # Be careful of what? This should be more specific - "... be careful when using `max_value` " "without `zero_center`." + "... be careful when using `max_value` without `zero_center`." ) if np.issubdtype(X.dtype, np.integer): diff --git a/src/scanpy/preprocessing/_scrublet/pipeline.py b/src/scanpy/preprocessing/_scrublet/pipeline.py index 586587e2cf..6e52a6650c 100644 --- a/src/scanpy/preprocessing/_scrublet/pipeline.py +++ b/src/scanpy/preprocessing/_scrublet/pipeline.py @@ -53,7 +53,8 @@ def truncated_svd( algorithm: Literal["arpack", "randomized"] = "arpack", ) -> None: if self._counts_sim_norm is None: - raise RuntimeError("_counts_sim_norm is not set") + msg = "_counts_sim_norm is not set" + raise RuntimeError(msg) from sklearn.decomposition import TruncatedSVD svd = TruncatedSVD( @@ -72,7 +73,8 @@ def pca( svd_solver: Literal["auto", "full", "arpack", "randomized"] = "arpack", ) -> None: if self._counts_sim_norm is None: - raise RuntimeError("_counts_sim_norm is not set") + msg = "_counts_sim_norm is not set" + raise RuntimeError(msg) from sklearn.decomposition import PCA X_obs = self._counts_obs_norm.toarray() diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index ac68edd376..fda79b4da2 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -146,10 +146,11 @@ def filter_cells( option is not None for option in [min_genes, min_counts, max_genes, max_counts] ) if n_given_options != 1: - raise ValueError( + msg = ( "Only provide one of the optional parameters `min_counts`, " "`min_genes`, `max_counts`, `max_genes` per call." ) + raise ValueError(msg) if isinstance(data, AnnData): raise_not_implemented_error_if_backed_type(data.X, "filter_cells") adata = data.copy() if copy else data @@ -261,10 +262,11 @@ def filter_genes( option is not None for option in [min_cells, min_counts, max_cells, max_counts] ) if n_given_options != 1: - raise ValueError( + msg = ( "Only provide one of the optional parameters `min_counts`, " "`min_cells`, `max_counts`, `max_cells` per call." ) + raise ValueError(msg) if isinstance(data, AnnData): raise_not_implemented_error_if_backed_type(data.X, "filter_genes") @@ -407,13 +409,13 @@ def log1p_anndata( if chunked: if (layer is not None) or (obsm is not None): - raise NotImplementedError( + msg = ( "Currently cannot perform chunked operations on arrays not stored in X." ) + raise NotImplementedError(msg) if adata.isbacked and adata.file._filemode != "r+": - raise NotImplementedError( - "log1p is not implemented for backed AnnData with backed mode not r+" - ) + msg = "log1p is not implemented for backed AnnData with backed mode not r+" + raise NotImplementedError(msg) for chunk, start, end in adata.chunked_X(chunk_size): adata.X[start:end] = log1p(chunk, base=base, copy=False) else: @@ -421,8 +423,10 @@ def log1p_anndata( if is_backed_type(X): msg = f"log1p is not implemented for matrices of type {type(X)}" if layer is not None: - raise NotImplementedError(f"{msg} from layers") - raise NotImplementedError(f"{msg} without `chunked=True`") + msg = f"{msg} from layers" + raise NotImplementedError(msg) + msg = f"{msg} without `chunked=True`" + raise NotImplementedError(msg) X = log1p(X, copy=False, base=base) _set_obs_rep(adata, X, layer=layer, obsm=obsm) @@ -595,7 +599,8 @@ def normalize_per_cell( elif use_rep is None: after = None else: - raise ValueError('use_rep should be "after", "X" or None') + msg = 'use_rep should be "after", "X" or None' + raise ValueError(msg) for layer in layers: _subset, counts = filter_cells(adata.layers[layer], min_counts=min_counts) temp = normalize_per_cell(adata.layers[layer], after, counts, copy=True) @@ -611,7 +616,8 @@ def normalize_per_cell( X = data.copy() if copy else data if counts_per_cell is None: if not copy: - raise ValueError("Can only be run with copy=True") + msg = "Can only be run with copy=True" + raise ValueError(msg) cell_subset, counts_per_cell = filter_cells(X, min_counts=min_counts) X = X[cell_subset] counts_per_cell = counts_per_cell[cell_subset] @@ -719,11 +725,12 @@ def regress_out( adata.obs[keys[0]].dtype, CategoricalDtype ): if len(keys) > 1: - raise ValueError( + msg = ( "If providing categorical variable, " "only a single one is allowed. For this one " "we regress on the mean for each category." ) + raise ValueError(msg) logg.debug("... regressing on per-gene means within categories") regressors = np.zeros(X.shape, dtype="float32") X = _to_dense(X, order="F") if issparse(X) else X @@ -1017,9 +1024,8 @@ def downsample_counts( total_counts_call = total_counts is not None counts_per_cell_call = counts_per_cell is not None if total_counts_call is counts_per_cell_call: - raise ValueError( - "Must specify exactly one of `total_counts` or `counts_per_cell`." - ) + msg = "Must specify exactly one of `total_counts` or `counts_per_cell`." + raise ValueError(msg) if copy: adata = adata.copy() if total_counts_call: @@ -1039,11 +1045,12 @@ def _downsample_per_cell(X, counts_per_cell, random_state, replace): # np.random.choice needs int arguments in numba code: counts_per_cell = counts_per_cell.astype(np.int_, copy=False) if not isinstance(counts_per_cell, np.ndarray) or len(counts_per_cell) != n_obs: - raise ValueError( + msg = ( "If provided, 'counts_per_cell' must be either an integer, or " "coercible to an `np.ndarray` of length as number of observations" " by `np.asarray(counts_per_cell)`." ) + raise ValueError(msg) if issparse(X): original_type = type(X) if not isspmatrix_csr(X): diff --git a/src/scanpy/preprocessing/_utils.py b/src/scanpy/preprocessing/_utils.py index b200e89ce8..3ca74734c0 100644 --- a/src/scanpy/preprocessing/_utils.py +++ b/src/scanpy/preprocessing/_utils.py @@ -64,7 +64,8 @@ def sparse_mean_variance_axis(mtx: sparse.spmatrix, axis: int): ax_minor = 0 shape = mtx.shape[::-1] else: - raise ValueError("This function only works on sparse csr and csc matrices") + msg = "This function only works on sparse csr and csc matrices" + raise ValueError(msg) if axis == ax_minor: return sparse_mean_var_major_axis( mtx.data, diff --git a/src/scanpy/queries/_queries.py b/src/scanpy/queries/_queries.py index 8da90151ce..e992f937e3 100644 --- a/src/scanpy/queries/_queries.py +++ b/src/scanpy/queries/_queries.py @@ -63,13 +63,13 @@ def simple_query( elif isinstance(attrs, Iterable): attrs = list(attrs) else: - raise TypeError(f"attrs must be of type list or str, was {type(attrs)}.") + msg = f"attrs must be of type list or str, was {type(attrs)}." + raise TypeError(msg) try: from pybiomart import Server except ImportError: - raise ImportError( - "This method requires the `pybiomart` module to be installed." - ) + msg = "This method requires the `pybiomart` module to be installed." + raise ImportError(msg) server = Server(host, use_cache=use_cache) dataset = server.marts["ENSEMBL_MART_ENSEMBL"].datasets[f"{org}_gene_ensembl"] res = dataset.query(attributes=attrs, filters=filters, use_attr_names=True) @@ -273,17 +273,17 @@ def enrich( try: from gprofiler import GProfiler except ImportError: - raise ImportError( - "This method requires the `gprofiler-official` module to be installed." - ) + msg = "This method requires the `gprofiler-official` module to be installed." + raise ImportError(msg) gprofiler = GProfiler(user_agent="scanpy", return_dataframe=True) gprofiler_kwargs = dict(gprofiler_kwargs) for k in ["organism"]: if gprofiler_kwargs.get(k) is not None: - raise ValueError( + msg = ( f"Argument `{k}` should be passed directly through `enrich`, " "not through `gprofiler_kwargs`" ) + raise ValueError(msg) return gprofiler.profile(container, organism=org, **gprofiler_kwargs) diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py index 3333fbc0a1..c568519cd7 100644 --- a/src/scanpy/readwrite.py +++ b/src/scanpy/readwrite.py @@ -41,6 +41,7 @@ from ._utils import _empty if TYPE_CHECKING: + from datetime import datetime from typing import BinaryIO, Literal from ._utils import Empty @@ -155,13 +156,14 @@ def read( filekey = str(filename) filename = settings.writedir / (filekey + "." + settings.file_format_data) if not filename.exists(): - raise ValueError( + msg = ( f"Reading with filekey {filekey!r} failed, " f"the inferred filename {filename!r} does not exist. " "If you intended to provide a filename, either use a filename " f"ending on one of the available extensions {avail_exts} " "or pass the parameter `ext`." ) + raise ValueError(msg) return read_h5ad(filename, backed=backed) @@ -219,40 +221,46 @@ def read_10x_h5( adata = _read_v3_10x_h5(filename, start=start) if genome: if genome not in adata.var["genome"].values: - raise ValueError( - f"Could not find data corresponding to genome '{genome}' in '{filename}'. " - f'Available genomes are: {list(adata.var["genome"].unique())}.' + msg = ( + f"Could not find data corresponding to genome {genome!r} in {filename}. " + f"Available genomes are: {list(adata.var['genome'].unique())}." ) + raise ValueError(msg) adata = adata[:, adata.var["genome"] == genome] if gex_only: adata = adata[:, adata.var["feature_types"] == "Gene Expression"] if adata.is_view: adata = adata.copy() else: - adata = _read_legacy_10x_h5(filename, genome=genome, start=start) + adata = _read_legacy_10x_h5(Path(filename), genome=genome, start=start) return adata -def _read_legacy_10x_h5(filename, *, genome=None, start=None): +def _read_legacy_10x_h5( + path: Path, *, genome: str | None = None, start: datetime | None = None +): """ Read hdf5 file from Cell Ranger v2 or earlier versions. """ - with h5py.File(str(filename), "r") as f: + with h5py.File(str(path), "r") as f: try: children = list(f.keys()) if not genome: if len(children) > 1: - raise ValueError( - f"'{filename}' contains more than one genome. For legacy 10x h5 " - "files you must specify the genome if more than one is present. " + msg = ( + f"{path} contains more than one genome. " + "For legacy 10x h5 files you must specify the genome " + "if more than one is present. " f"Available genomes are: {children}" ) + raise ValueError(msg) genome = children[0] elif genome not in children: - raise ValueError( - f"Could not find genome '{genome}' in '{filename}'. " + msg = ( + f"Could not find genome {genome!r} in {path}. " f"Available genomes are: {children}" ) + raise ValueError(msg) dsets = {} _collect_datasets(dsets, f[genome]) @@ -283,7 +291,8 @@ def _read_legacy_10x_h5(filename, *, genome=None, start=None): logg.info("", time=start) return adata except KeyError: - raise Exception("File is missing one or more required datasets.") + msg = "File is missing one or more required datasets." + raise Exception(msg) def _collect_datasets(dsets: dict, group: h5py.Group): @@ -354,7 +363,8 @@ def _read_v3_10x_h5(filename, *, start=None): ] ) else: - raise ValueError("10x h5 has no features group") + msg = "10x h5 has no features group" + raise ValueError(msg) adata = AnnData( matrix, obs=obs_dict, @@ -363,7 +373,8 @@ def _read_v3_10x_h5(filename, *, start=None): logg.info("", time=start) return adata except KeyError: - raise Exception("File is missing one or more required datasets.") + msg = "File is missing one or more required datasets." + raise Exception(msg) @deprecated("Use `squidpy.read.visium` instead.") @@ -468,11 +479,11 @@ def read_visium( if not f.exists(): if any(x in str(f) for x in ["hires_image", "lowres_image"]): logg.warning( - f"You seem to be missing an image file.\n" - f"Could not find '{f}'." + f"You seem to be missing an image file.\nCould not find {f}." ) else: - raise OSError(f"Could not find '{f}'") + msg = f"Could not find {f}" + raise OSError(msg) adata.uns["spatial"][library_id]["images"] = dict() for res in ["hires", "lowres"]: @@ -481,7 +492,8 @@ def read_visium( str(files[f"{res}_image"]) ) except Exception: - raise OSError(f"Could not find '{res}_image'") + msg = f"Could not find '{res}_image'" + raise OSError(msg) # read json scalefactors adata.uns["spatial"][library_id]["scalefactors"] = json.loads( @@ -623,7 +635,8 @@ def _read_10x_mtx( adata.var_names = genes[0].values adata.var["gene_symbols"] = genes[1].values else: - raise ValueError("`var_names` needs to be 'gene_symbols' or 'gene_ids'") + msg = "`var_names` needs to be 'gene_symbols' or 'gene_ids'" + raise ValueError(msg) if not is_legacy: adata.var["feature_types"] = genes[2].values barcodes = pd.read_csv(path / f"{prefix}barcodes.tsv{suffix}", header=None) @@ -667,11 +680,12 @@ def write( if ext is None: ext = ext_ elif ext != ext_: - raise ValueError( + msg = ( "It suffices to provide the file type by " "providing a proper extension to the filename." 'One of "txt", "csv", "h5" or "npz".' ) + raise ValueError(msg) else: key = filename ext = settings.file_format_data if ext is None else ext @@ -767,9 +781,8 @@ def _read( **kwargs, ): if ext is not None and ext not in avail_exts: - raise ValueError( - "Please provide one of the available extensions.\n" f"{avail_exts}" - ) + msg = f"Please provide one of the available extensions.\n{avail_exts}" + raise ValueError(msg) else: ext = is_valid_filename(filename, return_ext=True) is_present = _check_datafile_present_and_download(filename, backup_url=backup_url) @@ -793,7 +806,8 @@ def _read( return read_h5ad(path_cache) if not is_present: - raise FileNotFoundError(f"Did not find file {filename}.") + msg = f"Did not find file {filename}." + raise FileNotFoundError(msg) logg.debug(f"reading {filename}") if not cache and not suppress_cache_warning: logg.hint( @@ -803,7 +817,8 @@ def _read( # do the actual reading if ext == "xlsx" or ext == "xls": if sheet is None: - raise ValueError("Provide `sheet` parameter when reading '.xlsx' files.") + msg = "Provide `sheet` parameter when reading '.xlsx' files." + raise ValueError(msg) else: adata = read_excel(filename, sheet) elif ext in {"mtx", "mtx.gz"}: @@ -817,7 +832,7 @@ def _read( elif ext in {"txt", "tab", "data", "tsv"}: if ext == "data": logg.hint( - "... assuming '.data' means tab or white-space " "separated text file", + "... assuming '.data' means tab or white-space separated text file" ) logg.hint("change this by passing `ext` to sc.read") adata = read_text(filename, delimiter, first_column_names) @@ -826,7 +841,8 @@ def _read( elif ext == "loom": adata = read_loom(filename=filename, **kwargs) else: - raise ValueError(f"Unknown extension {ext}.") + msg = f"Unknown extension {ext}." + raise ValueError(msg) if cache: logg.info( f"... writing an {settings.file_format_data} " @@ -1091,11 +1107,10 @@ def is_valid_filename(filename: Path, *, return_ext: bool = False): return "mtx.gz" if return_ext else True elif not return_ext: return False - raise ValueError( - f"""\ + msg = f"""\ {filename!r} does not end on a valid extension. Please, provide one of the available extensions. {avail_exts} Text files with .gz and .bz2 extensions are also supported.\ """ - ) + raise ValueError(msg) diff --git a/src/scanpy/tools/_dendrogram.py b/src/scanpy/tools/_dendrogram.py index f60f0ae2e9..b31e792f31 100644 --- a/src/scanpy/tools/_dendrogram.py +++ b/src/scanpy/tools/_dendrogram.py @@ -124,15 +124,17 @@ def dendrogram( groupby = [groupby] for group in groupby: if group not in adata.obs_keys(): - raise ValueError( + msg = ( "groupby has to be a valid observation. " f"Given value: {group}, valid observations: {adata.obs_keys()}" ) + raise ValueError(msg) if not isinstance(adata.obs[group].dtype, CategoricalDtype): - raise ValueError( + msg = ( "groupby has to be a categorical observation. " f"Given value: {group}, Column type: {adata.obs[group].dtype}" ) + raise ValueError(msg) if var_names is None: rep_df = pd.DataFrame( @@ -188,7 +190,7 @@ def dendrogram( if inplace: if key_added is None: - key_added = f'dendrogram_{"_".join(groupby)}' + key_added = f"dendrogram_{'_'.join(groupby)}" logg.info(f"Storing dendrogram info using `.uns[{key_added!r}]`") adata.uns[key_added] = dat else: diff --git a/src/scanpy/tools/_diffmap.py b/src/scanpy/tools/_diffmap.py index d2bdcc647b..8554552252 100644 --- a/src/scanpy/tools/_diffmap.py +++ b/src/scanpy/tools/_diffmap.py @@ -77,11 +77,11 @@ def diffmap( neighbors_key = "neighbors" if neighbors_key not in adata.uns: - raise ValueError( - "You need to run `pp.neighbors` first to compute a neighborhood graph." - ) + msg = "You need to run `pp.neighbors` first to compute a neighborhood graph." + raise ValueError(msg) if n_comps <= 2: - raise ValueError("Provide any value greater than 2 for `n_comps`. ") + msg = "Provide any value greater than 2 for `n_comps`. " + raise ValueError(msg) adata = adata.copy() if copy else adata _diffmap( adata, n_comps=n_comps, neighbors_key=neighbors_key, random_state=random_state diff --git a/src/scanpy/tools/_dpt.py b/src/scanpy/tools/_dpt.py index c0fa59262f..e92fc726c6 100644 --- a/src/scanpy/tools/_dpt.py +++ b/src/scanpy/tools/_dpt.py @@ -18,7 +18,7 @@ def _diffmap(adata, n_comps=15, neighbors_key=None, random_state=0): - start = logg.info(f"computing Diffusion Maps using n_comps={n_comps}(=n_dcs)") + start = logg.info(f"computing Diffusion Maps using {n_comps=}(=n_dcs)") dpt = DPT(adata, neighbors_key=neighbors_key) dpt.compute_transitions() dpt.compute_eigen(n_comps=n_comps, random_state=random_state) @@ -129,7 +129,8 @@ def dpt( if neighbors_key is None: neighbors_key = "neighbors" if neighbors_key not in adata.uns: - raise ValueError("You need to run `pp.neighbors` and `tl.diffmap` first.") + msg = "You need to run `pp.neighbors` and `tl.diffmap` first." + raise ValueError(msg) if "iroot" not in adata.uns and "xroot" not in adata.var: logg.warning( "No root cell found. To compute pseudotime, pass the index or " @@ -152,7 +153,7 @@ def dpt( allow_kendall_tau_shift=allow_kendall_tau_shift, neighbors_key=neighbors_key, ) - start = logg.info(f"computing Diffusion Pseudotime using n_dcs={n_dcs}") + start = logg.info(f"computing Diffusion Pseudotime using {n_dcs=}") if n_branchings > 1: logg.info(" this uses a hierarchical implementation") if dpt.iroot is not None: @@ -262,7 +263,7 @@ def detect_branchings(self): """ logg.debug( f" detect {self.n_branchings} " - f'branching{"" if self.n_branchings == 1 else "s"}', + f"branching{'' if self.n_branchings == 1 else 's'}", ) # a segment is a subset of points of the data set (defined by the # indices of the points in the segment) @@ -799,9 +800,8 @@ def _detect_branching( elif self.flavor == "wolf17_bi" or self.flavor == "wolf17_bi_un": ssegs = self._detect_branching_single_wolf17_bi(Dseg, tips) else: - raise ValueError( - '`flavor` needs to be in {"haghverdi16", "wolf17_tri", "wolf17_bi"}.' - ) + msg = '`flavor` needs to be in {"haghverdi16", "wolf17_tri", "wolf17_bi"}.' + raise ValueError(msg) # make sure that each data point has a unique association with a segment masks = np.zeros((len(ssegs), Dseg.shape[0]), dtype=bool) for iseg, seg in enumerate(ssegs): @@ -1039,9 +1039,11 @@ def kendall_tau_split(self, a: np.ndarray, b: np.ndarray) -> int: Splitting index according to above description. """ if a.size != b.size: - raise ValueError("a and b need to have the same size") + msg = "a and b need to have the same size" + raise ValueError(msg) if a.ndim != b.ndim != 1: - raise ValueError("a and b need to be one-dimensional arrays") + msg = "a and b need to be one-dimensional arrays" + raise ValueError(msg) import scipy as sp min_length = 5 diff --git a/src/scanpy/tools/_draw_graph.py b/src/scanpy/tools/_draw_graph.py index aedd41f3d3..d0a70b3f4f 100644 --- a/src/scanpy/tools/_draw_graph.py +++ b/src/scanpy/tools/_draw_graph.py @@ -124,7 +124,8 @@ def draw_graph( """ start = logg.info(f"drawing single-cell graph using layout {layout!r}") if layout not in (layouts := get_literal_vals(_Layout)): - raise ValueError(f"Provide a valid layout, one of {layouts}.") + msg = f"Provide a valid layout, one of {layouts}." + raise ValueError(msg) adata = adata.copy() if copy else adata if adjacency is None: adjacency = _choose_graph(adata, obsp, neighbors_key) diff --git a/src/scanpy/tools/_embedding_density.py b/src/scanpy/tools/_embedding_density.py index 5ae69361dc..d539848b98 100644 --- a/src/scanpy/tools/_embedding_density.py +++ b/src/scanpy/tools/_embedding_density.py @@ -130,10 +130,11 @@ def embedding_density( basis = "draw_graph_fa" if f"X_{basis}" not in adata.obsm_keys(): - raise ValueError( + msg = ( "Cannot find the embedded representation " f"`adata.obsm['X_{basis}']`. Compute the embedding first." ) + raise ValueError(msg) if components is None: components = "1,2" @@ -142,17 +143,20 @@ def embedding_density( components = np.array(components).astype(int) - 1 if len(components) != 2: - raise ValueError("Please specify exactly 2 components, or `None`.") + msg = "Please specify exactly 2 components, or `None`." + raise ValueError(msg) if basis == "diffmap": components += 1 if groupby is not None: if groupby not in adata.obs: - raise ValueError(f"Could not find {groupby!r} `.obs` column.") + msg = f"Could not find {groupby!r} `.obs` column." + raise ValueError(msg) if adata.obs[groupby].dtype.name != "category": - raise ValueError(f"{groupby!r} column does not contain categorical data") + msg = f"{groupby!r} column does not contain categorical data" + raise ValueError(msg) # Define new covariate name if key_added is not None: diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py index 3698067035..2a47e095a0 100644 --- a/src/scanpy/tools/_ingest.py +++ b/src/scanpy/tools/_ingest.py @@ -123,11 +123,12 @@ def ingest( # anndata version check anndata_version = pkg_version("anndata") if anndata_version < ANNDATA_MIN_VERSION: - raise ValueError( + msg = ( f"ingest only works correctly with anndata>={ANNDATA_MIN_VERSION} " f"(you have {anndata_version}) as prior to {ANNDATA_MIN_VERSION}, " "`AnnData.concatenate` did not concatenate `.obsm`." ) + raise ValueError(msg) start = logg.info("running ingest") obs = [obs] if isinstance(obs, str) else obs @@ -187,12 +188,13 @@ def __init__(self, dim, axis=0, vals=None): def __setitem__(self, key, value): if value.shape[self._axis] != self._dim: - raise ValueError( - f"Value passed for key '{key}' is of incorrect shape. " + msg = ( + f"Value passed for key {key!r} is of incorrect shape. " f"Value has shape {value.shape[self._axis]} " f"for dimension {self._axis} while " f"it should have {self._dim}." ) + raise ValueError(msg) self._data[key] = value def __getitem__(self, key): @@ -340,10 +342,11 @@ def __init__(self, adata: AnnData, neighbors_key: str | None = None): if neighbors_key in adata.uns: self._init_neighbors(adata, neighbors_key) else: - raise ValueError( + msg = ( f'There is no neighbors data in `adata.uns["{neighbors_key}"]`.\n' "Please run pp.neighbors." ) + raise ValueError(msg) if "X_umap" in adata.obsm: self._init_umap(adata) @@ -393,10 +396,11 @@ def fit(self, adata_new): new_var_names = adata_new.var_names.str.upper() if not ref_var_names.equals(new_var_names): - raise ValueError( + msg = ( "Variables in the new adata are different " "from variables in the reference adata" ) + raise ValueError(msg) self._obs = pd.DataFrame(index=adata_new.obs.index) self._obsm = _DimDict(adata_new.n_obs, axis=0) @@ -440,9 +444,8 @@ def map_embedding(self, method): elif method == "pca": self._obsm["X_pca"] = self._pca() else: - raise NotImplementedError( - "Ingest supports only umap and pca embeddings for now." - ) + msg = "Ingest supports only umap and pca embeddings for now." + raise NotImplementedError(msg) def _knn_classify(self, labels): # ensure it's categorical @@ -461,7 +464,8 @@ def map_labels(self, labels, method): if method == "knn": self._obs[labels] = self._knn_classify(labels) else: - raise NotImplementedError("Ingest supports knn labeling for now.") + msg = "Ingest supports knn labeling for now." + raise NotImplementedError(msg) @old_positionals("inplace") def to_adata(self, *, inplace: bool = False) -> AnnData | None: diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index f73ec1fd7d..9f1fbf23ef 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -120,19 +120,18 @@ def leiden( and `n_iterations`. """ if flavor not in {"igraph", "leidenalg"}: - raise ValueError( - f"flavor must be either 'igraph' or 'leidenalg', but '{flavor}' was passed" + msg = ( + f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed" ) + raise ValueError(msg) _utils.ensure_igraph() if flavor == "igraph": if directed: - raise ValueError( - "Cannot use igraph’s leiden implementation with a directed graph." - ) + msg = "Cannot use igraph’s leiden implementation with a directed graph." + raise ValueError(msg) if partition_type is not None: - raise ValueError( - "Do not pass in partition_type argument when using igraph." - ) + msg = "Do not pass in partition_type argument when using igraph." + raise ValueError(msg) else: try: import leidenalg @@ -140,9 +139,8 @@ def leiden( msg = 'In the future, the default backend for leiden will be igraph instead of leidenalg.\n\n To achieve the future defaults please pass: flavor="igraph" and n_iterations=2. directed must also be False to work with igraph\'s implementation.' _utils.warn_once(msg, FutureWarning, stacklevel=3) except ImportError: - raise ImportError( - "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." - ) + msg = "Please install the leiden algorithm: `conda install -c conda-forge leidenalg` or `pip3 install leidenalg`." + raise ImportError(msg) clustering_args = dict(clustering_args) start = logg.info("running Leiden clustering") diff --git a/src/scanpy/tools/_louvain.py b/src/scanpy/tools/_louvain.py index 470858ff38..50181229ab 100644 --- a/src/scanpy/tools/_louvain.py +++ b/src/scanpy/tools/_louvain.py @@ -143,9 +143,8 @@ def louvain( partition_kwargs = dict(partition_kwargs) start = logg.info("running Louvain clustering") if (flavor != "vtraag") and (partition_type is not None): - raise ValueError( - "`partition_type` is only a valid argument " 'when `flavour` is "vtraag"' - ) + msg = '`partition_type` is only a valid argument when `flavour` is "vtraag"' + raise ValueError(msg) adata = adata.copy() if copy else adata if adjacency is None: adjacency = _choose_graph(adata, obsp, neighbors_key) @@ -239,7 +238,8 @@ def louvain( for k, v in partition.items(): groups[k] = v else: - raise ValueError('`flavor` needs to be "vtraag" or "igraph" or "taynaud".') + msg = '`flavor` needs to be "vtraag" or "igraph" or "taynaud".' + raise ValueError(msg) if restrict_to is not None: if key_added == "louvain": key_added += "_R" diff --git a/src/scanpy/tools/_marker_gene_overlap.py b/src/scanpy/tools/_marker_gene_overlap.py index eb07b84885..1860fd73df 100644 --- a/src/scanpy/tools/_marker_gene_overlap.py +++ b/src/scanpy/tools/_marker_gene_overlap.py @@ -162,30 +162,35 @@ def marker_gene_overlap( """ # Test user inputs if inplace: - raise NotImplementedError( + msg = ( "Writing Pandas dataframes to h5ad is currently under development." "\nPlease use `inplace=False`." ) + raise NotImplementedError(msg) if key not in adata.uns: - raise ValueError( + msg = ( "Could not find marker gene data. " "Please run `sc.tl.rank_genes_groups()` first." ) + raise ValueError(msg) avail_methods = {"overlap_count", "overlap_coef", "jaccard", "enrich"} if method not in avail_methods: - raise ValueError(f"Method must be one of {avail_methods}.") + msg = f"Method must be one of {avail_methods}." + raise ValueError(msg) if normalize == "None": normalize = None avail_norm = {"reference", "data", None} if normalize not in avail_norm: - raise ValueError(f"Normalize must be one of {avail_norm}.") + msg = f"Normalize must be one of {avail_norm}." + raise ValueError(msg) if normalize is not None and method != "overlap_count": - raise ValueError("Can only normalize with method=`overlap_count`.") + msg = "Can only normalize with method=`overlap_count`." + raise ValueError(msg) if not all(isinstance(val, AbstractSet) for val in reference_markers.values()): try: @@ -193,18 +198,20 @@ def marker_gene_overlap( key: set(val) for key, val in reference_markers.items() } except Exception: - raise ValueError( + msg = ( "Please ensure that `reference_markers` contains " "sets or lists of markers as values." ) + raise ValueError(msg) if adj_pval_threshold is not None: if "pvals_adj" not in adata.uns[key]: - raise ValueError( + msg = ( "Could not find adjusted p-value data. " "Please run `sc.tl.rank_genes_groups()` with a " "method that outputs adjusted p-values." ) + raise ValueError(msg) if adj_pval_threshold < 0: logg.warning( diff --git a/src/scanpy/tools/_paga.py b/src/scanpy/tools/_paga.py index 98146b83e2..b7f1e86e5d 100644 --- a/src/scanpy/tools/_paga.py +++ b/src/scanpy/tools/_paga.py @@ -107,21 +107,22 @@ def paga( """ check_neighbors = "neighbors" if neighbors_key is None else neighbors_key if check_neighbors not in adata.uns: - raise ValueError( - "You need to run `pp.neighbors` first to compute a neighborhood graph." - ) + msg = "You need to run `pp.neighbors` first to compute a neighborhood graph." + raise ValueError(msg) if groups is None: for k in ("leiden", "louvain"): if k in adata.obs.columns: groups = k break if groups is None: - raise ValueError( + msg = ( "You need to run `tl.leiden` or `tl.louvain` to compute " "community labels, or specify `groups='an_existing_key'`" ) + raise ValueError(msg) elif groups not in adata.obs.columns: - raise KeyError(f"`groups` key {groups!r} not found in `adata.obs`.") + msg = f"`groups` key {groups!r} not found in `adata.obs`." + raise KeyError(msg) adata = adata.copy() if copy else adata _utils.sanitize_anndata(adata) @@ -170,9 +171,8 @@ def compute_connectivities(self): elif self._model == "v1.0": return self._compute_connectivities_v1_0() else: - raise ValueError( - f"`model` {self._model} needs to be one of {_AVAIL_MODELS}." - ) + msg = f"`model` {self._model} needs to be one of {_AVAIL_MODELS}." + raise ValueError(msg) def _compute_connectivities_v1_2(self): import igraph @@ -273,15 +273,17 @@ def compute_transitions(self): "The key 'velocyto_transitions' has been changed to 'velocity_graph'." ) else: - raise ValueError( + msg = ( "The passed AnnData needs to have an `uns` annotation " "with key 'velocity_graph' - a sparse matrix from RNA velocity." ) + raise ValueError(msg) if self._adata.uns[vkey].shape != (self._adata.n_obs, self._adata.n_obs): - raise ValueError( + msg = ( f"The passed 'velocity_graph' have shape {self._adata.uns[vkey].shape} " f"but shoud have shape {(self._adata.n_obs, self._adata.n_obs)}" ) + raise ValueError(msg) # restore this at some point # if 'expected_n_edges_random' not in self._adata.uns['paga']: # raise ValueError( diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index cafb78c6f1..05e5738d99 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -132,7 +132,8 @@ def __init__( adata_comp = adata if layer is not None: if use_raw: - raise ValueError("Cannot specify `layer` and have `use_raw=True`.") + msg = "Cannot specify `layer` and have `use_raw=True`." + raise ValueError(msg) X = adata_comp.layers[layer] else: if use_raw and adata.raw is not None: @@ -253,7 +254,8 @@ def t_test( # hack for overestimating the variance for small groups ns_rest = ns_group else: - raise ValueError("Method does not exist.") + msg = "Method does not exist." + raise ValueError(msg) # TODO: Come up with better solution. Mask unexpressed genes? # See https://github.com/scipy/scipy/issues/10269 @@ -369,7 +371,8 @@ def logreg( X = self.X[self.grouping_mask.values, :] if len(self.groups_order) == 1: - raise ValueError("Cannot perform logistic regression on a single cluster.") + msg = "Cannot perform logistic regression on a single cluster." + raise ValueError(msg) clf = LogisticRegression(**kwds) clf.fit(X, self.grouping.cat.codes) @@ -598,7 +601,8 @@ def rank_genes_groups( if use_raw is None: use_raw = adata.raw is not None elif use_raw is True and adata.raw is None: - raise ValueError("Received `use_raw=True`, but `adata.raw` is empty.") + msg = "Received `use_raw=True`, but `adata.raw` is empty." + raise ValueError(msg) if method is None: method = "t-test" @@ -608,11 +612,13 @@ def rank_genes_groups( start = logg.info("ranking genes") if method not in (avail_methods := get_literal_vals(_Method)): - raise ValueError(f"Method must be one of {avail_methods}.") + msg = f"Method must be one of {avail_methods}." + raise ValueError(msg) avail_corr = {"benjamini-hochberg", "bonferroni"} if corr_method not in avail_corr: - raise ValueError(f"Correction method must be one of {avail_corr}.") + msg = f"Correction method must be one of {avail_corr}." + raise ValueError(msg) adata = adata.copy() if copy else adata _utils.sanitize_anndata(adata) @@ -620,7 +626,8 @@ def rank_genes_groups( if groups == "all": groups_order = "all" elif isinstance(groups, str | int): - raise ValueError("Specify a sequence of groups") + msg = "Specify a sequence of groups" + raise ValueError(msg) else: groups_order = list(groups) if isinstance(groups_order[0], int): @@ -629,9 +636,8 @@ def rank_genes_groups( groups_order += [reference] if reference != "rest" and reference not in adata.obs[groupby].cat.categories: cats = adata.obs[groupby].cat.categories.tolist() - raise ValueError( - f"reference = {reference} needs to be one of groupby = {cats}." - ) + msg = f"reference = {reference} needs to be one of groupby = {cats}." + raise ValueError(msg) if key_added is None: key_added = "rank_genes_groups" diff --git a/src/scanpy/tools/_score_genes.py b/src/scanpy/tools/_score_genes.py index a40d9f3288..d0a33fdb97 100644 --- a/src/scanpy/tools/_score_genes.py +++ b/src/scanpy/tools/_score_genes.py @@ -38,7 +38,8 @@ def _sparse_nanmean( np.nanmean equivalent for sparse matrices """ if not issparse(X): - raise TypeError("X must be a sparse matrix") + msg = "X must be a sparse matrix" + raise TypeError(msg) # count the number of nan elements per row/column (dep. on axis) Z = X.copy() @@ -130,9 +131,8 @@ def score_genes( adata = adata.copy() if copy else adata use_raw = _check_use_raw(adata, use_raw, layer=layer) if is_backed_type(adata.X) and not use_raw: - raise NotImplementedError( - f"score_genes is not implemented for matrices of type {type(adata.X)}" - ) + msg = f"score_genes is not implemented for matrices of type {type(adata.X)}" + raise NotImplementedError(msg) if random_state is not None: np.random.seed(random_state) @@ -204,14 +204,16 @@ def _check_score_genes_args( if len(genes_to_ignore) > 0: logg.warning(f"genes are not in var_names and ignored: {genes_to_ignore}") if len(gene_list) == 0: - raise ValueError("No valid genes were passed for scoring.") + msg = "No valid genes were passed for scoring." + raise ValueError(msg) if gene_pool is None: gene_pool = var_names.astype("string") else: gene_pool = pd.Index(gene_pool, dtype="string").intersection(var_names) if len(gene_pool) == 0: - raise ValueError("No valid genes were passed for reference set.") + msg = "No valid genes were passed for reference set." + raise ValueError(msg) def get_subset(genes: pd.Index[str]): x = _get_obs_rep(adata, use_raw=use_raw, layer=layer) diff --git a/src/scanpy/tools/_sim.py b/src/scanpy/tools/_sim.py index 7410442952..a53575fa29 100644 --- a/src/scanpy/tools/_sim.py +++ b/src/scanpy/tools/_sim.py @@ -120,7 +120,7 @@ def add_args(p): "default": "", "metavar": "f", "type": str, - "help": "Specify a parameter file " '(default: "sim/${exkey}_params.txt")', + "help": 'Specify a parameter file (default: "sim/${exkey}_params.txt")', } } p = _utils.add_args(p, dadd_args) @@ -216,7 +216,7 @@ def sample_dynamic_data(**params): break logg.debug( f"mean nr of offdiagonal edges {nrOffEdges_list.mean()} " - f"compared to total nr {grnsim.dim * (grnsim.dim - 1) / 2.}" + f"compared to total nr {grnsim.dim * (grnsim.dim - 1) / 2.0}" ) # more complex models @@ -358,15 +358,13 @@ def write_data( for g in range(dim): if np.abs(Coupl[gp, g]) > 1e-10: f.write( - f"{names[gp]:10} " - f"{names[g]:10} " - f"{Coupl[gp, g]:10.3} \n" + f"{names[gp]:10} {names[g]:10} {Coupl[gp, g]:10.3} \n" ) # write simulated data # the binary mode option in the following line is a fix for python 3 # variable names if varNames: - header += f'{"it":>2} ' + header += f"{'it':>2} " for v in varNames: header += f"{v:>7} " with (dir / f"sim_{id}.txt").open("ab" if append else "wb") as f: @@ -429,7 +427,8 @@ def __init__( self.verbosity = verbosity # checks if initType not in ["branch", "random"]: - raise RuntimeError("initType must be either: branch, random") + msg = "initType must be either: branch, random" + raise RuntimeError(msg) if model not in self.availModels: message = "model not among predefined models \n" # noqa: F841 # TODO FIX # read from file @@ -437,7 +436,8 @@ def __init__( model = Path(sim_models.__file__).parent / f"{model}.txt" if not model.is_file(): - raise RuntimeError(f"Model file {model} does not exist") + msg = f"Model file {model} does not exist" + raise RuntimeError(msg) self.model = model # set the coupling matrix, and with that the adjacency matrix self.set_coupl(Coupl=Coupl) @@ -461,7 +461,8 @@ def sim_model(self, tmax, X0, noiseDyn=0, restart=0): elif self.modelType == "var": Xdiff = self.Xdiff_var(X[t - 1]) else: - raise ValueError(f"Unknown modelType {self.modelType!r}") + msg = f"Unknown modelType {self.modelType!r}" + raise ValueError(msg) X[t] = X[t - 1] + Xdiff # add dynamic noise X[t] += noiseDyn * np.random.randn(self.dim) @@ -501,7 +502,7 @@ def Xdiff_hill(self, Xt): ) if verbosity > 0: Xdiff_syn_tuple_str += ( - f'{"a" if v else "i"}' + f"{'a' if v else 'i'}" f"({self.pas[child][iv]}, {threshold:.2})" ) Xdiff_syn += Xdiff_syn_tuple @@ -853,12 +854,12 @@ def build_boolCoeff(self): for g in range(self.dim): if g in pasIndices: if np.abs(self.Coupl[self.varNames[key], g]) < 1e-10: - raise ValueError(f"specify coupling value for {key} <- {g}") + msg = f"specify coupling value for {key} <- {g}" + raise ValueError(msg) else: if np.abs(self.Coupl[self.varNames[key], g]) > 1e-10: - raise ValueError( - "there should be no coupling value for " f"{key} <- {g}" - ) + msg = f"there should be no coupling value for {key} <- {g}" + raise ValueError(msg) if self.verbosity > 1: settings.m(0, "..." + key) settings.m(0, rule) @@ -957,7 +958,7 @@ def _check_branching( check = False if check: Xsamples.append(X) - logg.debug(f'realization {restart}: {"" if check else "no"} new branch') + logg.debug(f"realization {restart}: {'' if check else 'no'} new branch") return check, Xsamples @@ -1047,9 +1048,8 @@ def sample_coupling_matrix( check = True break if not check: - raise ValueError( - "did not find graph without cycles after" f"{max_trial} trials" - ) + msg = f"did not find graph without cycles after {max_trial} trials" + raise ValueError(msg) return Coupl, Adj, Adj_signed, n_edges diff --git a/src/scanpy/tools/_umap.py b/src/scanpy/tools/_umap.py index 902171d58c..926e6d3d4f 100644 --- a/src/scanpy/tools/_umap.py +++ b/src/scanpy/tools/_umap.py @@ -164,9 +164,8 @@ def umap( if neighbors_key is None: # backwards compat neighbors_key = "neighbors" if neighbors_key not in adata.uns: - raise ValueError( - f"Did not find .uns[{neighbors_key!r}]. Run `sc.pp.neighbors` first." - ) + msg = f"Did not find .uns[{neighbors_key!r}]. Run `sc.pp.neighbors` first." + raise ValueError(msg) start = logg.info("computing UMAP") @@ -241,10 +240,11 @@ def umap( warnings.warn(msg, FutureWarning) metric = neigh_params.get("metric", "euclidean") if metric != "euclidean": - raise ValueError( + msg = ( f"`sc.pp.neighbors` was called with `metric` {metric!r}, " "but umap `method` 'rapids' only supports the 'euclidean' metric." ) + raise ValueError(msg) from cuml import UMAP n_neighbors = neighbors["params"]["n_neighbors"] diff --git a/src/scanpy/tools/_utils.py b/src/scanpy/tools/_utils.py index 97e2de0df1..4d24b5e276 100644 --- a/src/scanpy/tools/_utils.py +++ b/src/scanpy/tools/_utils.py @@ -32,9 +32,8 @@ def _choose_representation( if adata.n_vars > settings.N_PCS: if "X_pca" in adata.obsm: if n_pcs is not None and n_pcs > adata.obsm["X_pca"].shape[1]: - raise ValueError( - "`X_pca` does not have enough PCs. Rerun `sc.pp.pca` with adjusted `n_comps`." - ) + msg = "`X_pca` does not have enough PCs. Rerun `sc.pp.pca` with adjusted `n_comps`." + raise ValueError(msg) X = adata.obsm["X_pca"][:, :n_pcs] logg.info(f" using 'X_pca' with n_pcs = {X.shape[1]}") else: @@ -52,21 +51,23 @@ def _choose_representation( else: if use_rep in adata.obsm and n_pcs is not None: if n_pcs > adata.obsm[use_rep].shape[1]: - raise ValueError( + msg = ( f"{use_rep} does not have enough Dimensions. Provide a " "Representation with equal or more dimensions than" "`n_pcs` or lower `n_pcs` " ) + raise ValueError(msg) X = adata.obsm[use_rep][:, :n_pcs] elif use_rep in adata.obsm and n_pcs is None: X = adata.obsm[use_rep] elif use_rep == "X": X = adata.X else: - raise ValueError( + msg = ( f"Did not find {use_rep} in `.obsm.keys()`. " "You need to compute it first." ) + raise ValueError(msg) settings.verbosity = verbosity # resetting verbosity return X @@ -86,7 +87,7 @@ def preprocess_with_pca(adata, n_pcs: int | None = None, random_state=0): logg.info(" using data matrix X directly (no PCA)") return adata.X elif n_pcs is None and "X_pca" in adata.obsm_keys(): - logg.info(f' using \'X_pca\' with n_pcs = {adata.obsm["X_pca"].shape[1]}') + logg.info(f" using 'X_pca' with n_pcs = {adata.obsm['X_pca'].shape[1]}") return adata.obsm["X_pca"] elif "X_pca" in adata.obsm_keys() and adata.obsm["X_pca"].shape[1] >= n_pcs: logg.info(f" using 'X_pca' with n_pcs = {n_pcs}") @@ -128,5 +129,6 @@ def get_init_pos_from_paga( else: init_pos[subset] = group_pos else: - raise ValueError("Plot PAGA first, so that adata.uns['paga']" "with key 'pos'.") + msg = "Plot PAGA first, so that adata.uns['paga'] with key 'pos'." + raise ValueError(msg) return init_pos diff --git a/src/scanpy/tools/_utils_clustering.py b/src/scanpy/tools/_utils_clustering.py index 47f652fbdf..3c771e5d74 100644 --- a/src/scanpy/tools/_utils_clustering.py +++ b/src/scanpy/tools/_utils_clustering.py @@ -37,12 +37,12 @@ def restrict_adjacency( adjacency: spmatrix, ) -> tuple[spmatrix, NDArray[np.bool_]]: if not isinstance(restrict_categories[0], str): - raise ValueError( - "You need to use strings to label categories, " "e.g. '1' instead of 1." - ) + msg = "You need to use strings to label categories, e.g. '1' instead of 1." + raise ValueError(msg) for c in restrict_categories: if c not in adata.obs[restrict_key].cat.categories: - raise ValueError(f"'{c}' is not a valid category for '{restrict_key}'") + msg = f"{c!r} is not a valid category for {restrict_key!r}" + raise ValueError(msg) restrict_indices = adata.obs[restrict_key].isin(restrict_categories).values adjacency = adjacency[restrict_indices, :] adjacency = adjacency[:, restrict_indices] diff --git a/src/testing/scanpy/_pytest/__init__.py b/src/testing/scanpy/_pytest/__init__.py index 318baac1aa..e365a90495 100644 --- a/src/testing/scanpy/_pytest/__init__.py +++ b/src/testing/scanpy/_pytest/__init__.py @@ -75,8 +75,7 @@ def pytest_addoption(parser: pytest.Parser) -> None: action="store_true", default=False, help=( - "Run tests that retrieve stuff from the internet. " - "This increases test time." + "Run tests that retrieve stuff from the internet. This increases test time." ), ) @@ -131,6 +130,6 @@ def pytest_itemcollected(item: pytest.Item) -> None: ) -assert ( - "scanpy" not in sys.modules -), "scanpy is already imported, this will mess up test coverage" +assert "scanpy" not in sys.modules, ( + "scanpy is already imported, this will mess up test coverage" +) diff --git a/tests/conftest.py b/tests/conftest.py index 4cbe5ff53e..2d7f8e7aad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,7 +133,8 @@ def save_and_compare(*path_parts: Path | os.PathLike, tol: int): plt.savefig(actual_pth, dpi=40) plt.close() if not expected_pth.is_file(): - raise OSError(f"No expected output found at {expected_pth}.") + msg = f"No expected output found at {expected_pth}." + raise OSError(msg) check_same_image(expected_pth, actual_pth, tol=tol) return save_and_compare diff --git a/tests/external/test_wishbone.py b/tests/external/test_wishbone.py index 7fadef63c6..db649a5b9d 100644 --- a/tests/external/test_wishbone.py +++ b/tests/external/test_wishbone.py @@ -22,6 +22,6 @@ def test_run_wishbone(): components=[2, 3], num_waypoints=150, ) - assert all( - [k in adata.obs for k in ["trajectory_wishbone", "branch_wishbone"]] - ), "Run Wishbone Error!" + assert all([k in adata.obs for k in ["trajectory_wishbone", "branch_wishbone"]]), ( + "Run Wishbone Error!" + ) diff --git a/tests/test_dendrogram.py b/tests/test_dendrogram.py index 18b952eff2..44a08fcf67 100644 --- a/tests/test_dendrogram.py +++ b/tests/test_dendrogram.py @@ -18,7 +18,7 @@ def test_dendrogram_key_added(groupby, key_added): adata = pbmc68k_reduced() sc.tl.dendrogram(adata, groupby=groupby, key_added=key_added, use_rep="X_pca") if isinstance(groupby, list): - dendrogram_key = f'dendrogram_{"_".join(groupby)}' + dendrogram_key = f"dendrogram_{'_'.join(groupby)}" else: dendrogram_key = f"dendrogram_{groupby}" diff --git a/tests/test_get.py b/tests/test_get.py index 673b26787d..05cb1b6a9d 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -24,7 +24,7 @@ def transpose_adata(adata: AnnData, *, expect_duplicates: bool = False) -> AnnDa TRANSPOSE_PARAMS = pytest.mark.parametrize( - "dim,transform,func", + ("dim", "transform", "func"), [ ("obs", lambda x, expect_duplicates=False: x, sc.get.obs_df), ("var", transpose_adata, sc.get.var_df), diff --git a/tests/test_highly_variable_genes.py b/tests/test_highly_variable_genes.py index 7d9fdac9fa..528a86ea99 100644 --- a/tests/test_highly_variable_genes.py +++ b/tests/test_highly_variable_genes.py @@ -629,7 +629,8 @@ def test_subset_inplace_consistency(flavor, array_type, batch_key): pass else: - raise ValueError(f"Unknown flavor {flavor}") + msg = f"Unknown flavor {flavor}" + raise ValueError(msg) n_genes = adata.shape[1] diff --git a/tests/test_normalization.py b/tests/test_normalization.py index 3acefe1bb1..9cf20c0b52 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -198,12 +198,12 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps): "Missing `.uns` keys. Expected `['pearson_residuals_normalization', 'pca']`, " f"but only {list(ad.uns.keys())} were found" ) - assert ( - "X_pca" in ad.obsm - ), f"Missing `obsm` key `'X_pca'`, only {list(ad.obsm.keys())} were found" - assert ( - "PCs" in ad.varm - ), f"Missing `varm` key `'PCs'`, only {list(ad.varm.keys())} were found" + assert "X_pca" in ad.obsm, ( + f"Missing `obsm` key `'X_pca'`, only {list(ad.obsm.keys())} were found" + ) + assert "PCs" in ad.varm, ( + f"Missing `varm` key `'PCs'`, only {list(ad.varm.keys())} were found" + ) assert ad.obsm["X_pca"].shape == ( n_cells, n_comps, diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 788c7e705d..b938fd2ca3 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -59,14 +59,12 @@ def get_example_data(array_type: Callable[[np.ndarray], Any]) -> AnnData: return adata -def get_true_scores() -> ( - tuple[ - NDArray[np.object_], - NDArray[np.object_], - NDArray[np.floating], - NDArray[np.floating], - ] -): +def get_true_scores() -> tuple[ + NDArray[np.object_], + NDArray[np.object_], + NDArray[np.floating], + NDArray[np.floating], +]: with (DATA_PATH / "objs_t_test.pkl").open("rb") as f: true_scores_t_test, true_names_t_test = pickle.load(f) with (DATA_PATH / "objs_wilcoxon.pkl").open("rb") as f: