From c312718ca66b292e794fe325a59ea9d46a0471e6 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 4 Dec 2024 11:06:40 +0100 Subject: [PATCH 1/2] Added three negative control methods --- CHANGELOG.md | 5 +++ .../shuffle_integration/config.vsh.yaml | 20 +++++++++ .../shuffle_integration/script.py | 43 ++++++++++++++++++ .../config.vsh.yaml | 20 +++++++++ .../shuffle_integration_by_batch/script.py | 44 +++++++++++++++++++ .../config.vsh.yaml | 20 +++++++++ .../script.py | 44 +++++++++++++++++++ src/control_methods/utils.py | 16 +++++++ 8 files changed, 212 insertions(+) create mode 100644 src/control_methods/shuffle_integration/config.vsh.yaml create mode 100644 src/control_methods/shuffle_integration/script.py create mode 100644 src/control_methods/shuffle_integration_by_batch/config.vsh.yaml create mode 100644 src/control_methods/shuffle_integration_by_batch/script.py create mode 100644 src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml create mode 100644 src/control_methods/shuffle_integration_by_cell_type/script.py create mode 100644 src/control_methods/utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 557813c..0feb49c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ * Added `methods/limma_remove_batch_effect` component (PR #7). +* Added three negative control methods (PR #8): + - `control_methods/shuffle_integration` + - `control_methods/shuffle_integration_by_batch` + - `control_methods/shuffle_integration_by_cell_type` + ## MAJOR CHANGES ## MINOR CHANGES diff --git a/src/control_methods/shuffle_integration/config.vsh.yaml b/src/control_methods/shuffle_integration/config.vsh.yaml new file mode 100644 index 0000000..8f5e65e --- /dev/null +++ b/src/control_methods/shuffle_integration/config.vsh.yaml @@ -0,0 +1,20 @@ +__merge__: /src/api/comp_control_method.yaml +name: shuffle_integration +label: Shuffle integration +summary: Integrations are randomly permuted +description: Integrations are randomly permuted +info: + method_types: [feature] + preferred_normalization: log_cp10k +resources: + - type: python_script + path: script.py + - path: /src/control_methods/utils.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, lowcpu] diff --git a/src/control_methods/shuffle_integration/script.py b/src/control_methods/shuffle_integration/script.py new file mode 100644 index 0000000..d943211 --- /dev/null +++ b/src/control_methods/shuffle_integration/script.py @@ -0,0 +1,43 @@ +import anndata as ad +import sys + +## VIASH START +par = { + "input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", + "output": "output.h5ad", +} +meta = {"name": "harmonypy"} +## VIASH END + +print("Importing helper functions", flush=True) +sys.path.append(meta["resources_dir"]) +from utils import _randomize_features + +print("Reading and preparing input files", flush=True) +adata = ad.read_h5ad(par["input_unintegrated"]) + +adata.obs["batch_str"] = adata.obs["batch"].astype(str) + +markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() + +adata = adata[:, markers_to_correct] + +print("Randomise features", flush=True) +integrated = _randomize_features( + adata.layers["preprocessed"] +) + +# create new anndata +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + layers={"integrated": integrated}, + uns={ + "dataset_id": adata.uns["dataset_id"], + "method_id": meta["name"], + "parameters": {}, + }, +) + +print("Write output AnnData to file", flush=True) +output.write_h5ad(par["output"], compression="gzip") diff --git a/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml b/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml new file mode 100644 index 0000000..0fa1a70 --- /dev/null +++ b/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml @@ -0,0 +1,20 @@ +__merge__: /src/api/comp_control_method.yaml +name: shuffle_integration_by_batch +label: Shuffle integration by batch +summary: Integrations are randomly permuted within each batch +description: Integrations are randomly permuted within each batch +info: + method_types: [feature] + preferred_normalization: log_cp10k +resources: + - type: python_script + path: script.py + - path: /src/control_methods/utils.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, lowcpu] diff --git a/src/control_methods/shuffle_integration_by_batch/script.py b/src/control_methods/shuffle_integration_by_batch/script.py new file mode 100644 index 0000000..07c8b18 --- /dev/null +++ b/src/control_methods/shuffle_integration_by_batch/script.py @@ -0,0 +1,44 @@ +import anndata as ad +import sys + +## VIASH START +par = { + "input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", + "output": "output.h5ad", +} +meta = {"name": "harmonypy"} +## VIASH END + +print("Importing helper functions", flush=True) +sys.path.append(meta["resources_dir"]) +from utils import _randomize_features + +print("Reading and preparing input files", flush=True) +adata = ad.read_h5ad(par["input_unintegrated"]) + +adata.obs["batch_str"] = adata.obs["batch"].astype(str) + +markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() + +adata = adata[:, markers_to_correct] + +print("Randomise features", flush=True) +integrated = _randomize_features( + adata.layers["preprocessed"], + partition=adata.obs["batch"], +) + +# create new anndata +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + layers={"integrated": integrated}, + uns={ + "dataset_id": adata.uns["dataset_id"], + "method_id": meta["name"], + "parameters": {}, + }, +) + +print("Write output AnnData to file", flush=True) +output.write_h5ad(par["output"], compression="gzip") diff --git a/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml b/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml new file mode 100644 index 0000000..2e99fc5 --- /dev/null +++ b/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml @@ -0,0 +1,20 @@ +__merge__: /src/api/comp_control_method.yaml +name: shuffle_integration_by_cell_type +label: Shuffle integration by cell type +summary: Integrations are randomly permuted within each cell type +description: Integrations are randomly permuted within each cell type +info: + method_types: [feature] + preferred_normalization: log_cp10k +resources: + - type: python_script + path: script.py + - path: /src/control_methods/utils.py +engines: + - type: docker + image: openproblems/base_python:1.0.0 +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, lowcpu] diff --git a/src/control_methods/shuffle_integration_by_cell_type/script.py b/src/control_methods/shuffle_integration_by_cell_type/script.py new file mode 100644 index 0000000..8165d57 --- /dev/null +++ b/src/control_methods/shuffle_integration_by_cell_type/script.py @@ -0,0 +1,44 @@ +import anndata as ad +import sys + +## VIASH START +par = { + "input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", + "output": "output.h5ad", +} +meta = {"name": "harmonypy"} +## VIASH END + +print("Importing helper functions", flush=True) +sys.path.append(meta["resources_dir"]) +from utils import _randomize_features + +print("Reading and preparing input files", flush=True) +adata = ad.read_h5ad(par["input_unintegrated"]) + +adata.obs["batch_str"] = adata.obs["batch"].astype(str) + +markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() + +adata = adata[:, markers_to_correct] + +print("Randomise features", flush=True) +integrated = _randomize_features( + adata.layers["preprocessed"], + partition=adata.obs["cell_type"], +) + +# create new anndata +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + layers={"integrated": integrated}, + uns={ + "dataset_id": adata.uns["dataset_id"], + "method_id": meta["name"], + "parameters": {}, + }, +) + +print("Write output AnnData to file", flush=True) +output.write_h5ad(par["output"], compression="gzip") diff --git a/src/control_methods/utils.py b/src/control_methods/utils.py new file mode 100644 index 0000000..b4f4673 --- /dev/null +++ b/src/control_methods/utils.py @@ -0,0 +1,16 @@ +import numpy as np + +def _randomize_features(X, partition=None): + """ + Taken and adapted from opsca-v1: + https://github.com/openproblems-bio/openproblems/blob/acf5c95a7306b819c4a13972783433d0a48f769b/openproblems/tasks/_batch_integration/_common/methods/baseline.py#L13 + """ + X_out = X.copy() + if partition is None: + partition = np.full(X.shape[0], 0) + else: + partition = np.asarray(partition) + for partition_name in np.unique(partition): + partition_idx = np.argwhere(partition == partition_name).flatten() + X_out[partition_idx] = X[np.random.permutation(partition_idx)] + return X_out From 8147adc5d93eb618a5f57795e7f3e500e8d95a6b Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 4 Dec 2024 11:07:54 +0100 Subject: [PATCH 2/2] remove unneeded info --- src/control_methods/shuffle_integration/config.vsh.yaml | 3 --- .../shuffle_integration_by_batch/config.vsh.yaml | 3 --- .../shuffle_integration_by_cell_type/config.vsh.yaml | 3 --- 3 files changed, 9 deletions(-) diff --git a/src/control_methods/shuffle_integration/config.vsh.yaml b/src/control_methods/shuffle_integration/config.vsh.yaml index 8f5e65e..b3df8a6 100644 --- a/src/control_methods/shuffle_integration/config.vsh.yaml +++ b/src/control_methods/shuffle_integration/config.vsh.yaml @@ -3,9 +3,6 @@ name: shuffle_integration label: Shuffle integration summary: Integrations are randomly permuted description: Integrations are randomly permuted -info: - method_types: [feature] - preferred_normalization: log_cp10k resources: - type: python_script path: script.py diff --git a/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml b/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml index 0fa1a70..e249ef8 100644 --- a/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml +++ b/src/control_methods/shuffle_integration_by_batch/config.vsh.yaml @@ -3,9 +3,6 @@ name: shuffle_integration_by_batch label: Shuffle integration by batch summary: Integrations are randomly permuted within each batch description: Integrations are randomly permuted within each batch -info: - method_types: [feature] - preferred_normalization: log_cp10k resources: - type: python_script path: script.py diff --git a/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml b/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml index 2e99fc5..c81a246 100644 --- a/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml +++ b/src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml @@ -3,9 +3,6 @@ name: shuffle_integration_by_cell_type label: Shuffle integration by cell type summary: Integrations are randomly permuted within each cell type description: Integrations are randomly permuted within each cell type -info: - method_types: [feature] - preferred_normalization: log_cp10k resources: - type: python_script path: script.py