Skip to content

Commit

Permalink
Added three negative control methods (#8)
Browse files Browse the repository at this point in the history
* Added three negative control methods

* remove unneeded info
  • Loading branch information
rcannood authored Dec 19, 2024
1 parent 376f5de commit 71f05f6
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 0 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

* Added `methods/limma_remove_batch_effect` component (PR #7).

* Added three negative control methods (PR #8):
- `control_methods/shuffle_integration`
- `control_methods/shuffle_integration_by_batch`
- `control_methods/shuffle_integration_by_cell_type`

## MAJOR CHANGES

## MINOR CHANGES
Expand Down
17 changes: 17 additions & 0 deletions src/control_methods/shuffle_integration/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
__merge__: /src/api/comp_control_method.yaml
name: shuffle_integration
label: Shuffle integration
summary: Integrations are randomly permuted
description: Integrations are randomly permuted
resources:
- type: python_script
path: script.py
- path: /src/control_methods/utils.py
engines:
- type: docker
image: openproblems/base_python:1.0.0
runners:
- type: executable
- type: nextflow
directives:
label: [midtime, midmem, lowcpu]
43 changes: 43 additions & 0 deletions src/control_methods/shuffle_integration/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import anndata as ad
import sys

## VIASH START
par = {
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad",
"output": "output.h5ad",
}
meta = {"name": "harmonypy"}
## VIASH END

print("Importing helper functions", flush=True)
sys.path.append(meta["resources_dir"])
from utils import _randomize_features

print("Reading and preparing input files", flush=True)
adata = ad.read_h5ad(par["input_unintegrated"])

adata.obs["batch_str"] = adata.obs["batch"].astype(str)

markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy()

adata = adata[:, markers_to_correct]

print("Randomise features", flush=True)
integrated = _randomize_features(
adata.layers["preprocessed"]
)

# create new anndata
output = ad.AnnData(
obs=adata.obs[[]],
var=adata.var[[]],
layers={"integrated": integrated},
uns={
"dataset_id": adata.uns["dataset_id"],
"method_id": meta["name"],
"parameters": {},
},
)

print("Write output AnnData to file", flush=True)
output.write_h5ad(par["output"], compression="gzip")
17 changes: 17 additions & 0 deletions src/control_methods/shuffle_integration_by_batch/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
__merge__: /src/api/comp_control_method.yaml
name: shuffle_integration_by_batch
label: Shuffle integration by batch
summary: Integrations are randomly permuted within each batch
description: Integrations are randomly permuted within each batch
resources:
- type: python_script
path: script.py
- path: /src/control_methods/utils.py
engines:
- type: docker
image: openproblems/base_python:1.0.0
runners:
- type: executable
- type: nextflow
directives:
label: [midtime, midmem, lowcpu]
44 changes: 44 additions & 0 deletions src/control_methods/shuffle_integration_by_batch/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import anndata as ad
import sys

## VIASH START
par = {
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad",
"output": "output.h5ad",
}
meta = {"name": "harmonypy"}
## VIASH END

print("Importing helper functions", flush=True)
sys.path.append(meta["resources_dir"])
from utils import _randomize_features

print("Reading and preparing input files", flush=True)
adata = ad.read_h5ad(par["input_unintegrated"])

adata.obs["batch_str"] = adata.obs["batch"].astype(str)

markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy()

adata = adata[:, markers_to_correct]

print("Randomise features", flush=True)
integrated = _randomize_features(
adata.layers["preprocessed"],
partition=adata.obs["batch"],
)

# create new anndata
output = ad.AnnData(
obs=adata.obs[[]],
var=adata.var[[]],
layers={"integrated": integrated},
uns={
"dataset_id": adata.uns["dataset_id"],
"method_id": meta["name"],
"parameters": {},
},
)

print("Write output AnnData to file", flush=True)
output.write_h5ad(par["output"], compression="gzip")
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
__merge__: /src/api/comp_control_method.yaml
name: shuffle_integration_by_cell_type
label: Shuffle integration by cell type
summary: Integrations are randomly permuted within each cell type
description: Integrations are randomly permuted within each cell type
resources:
- type: python_script
path: script.py
- path: /src/control_methods/utils.py
engines:
- type: docker
image: openproblems/base_python:1.0.0
runners:
- type: executable
- type: nextflow
directives:
label: [midtime, midmem, lowcpu]
44 changes: 44 additions & 0 deletions src/control_methods/shuffle_integration_by_cell_type/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import anndata as ad
import sys

## VIASH START
par = {
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad",
"output": "output.h5ad",
}
meta = {"name": "harmonypy"}
## VIASH END

print("Importing helper functions", flush=True)
sys.path.append(meta["resources_dir"])
from utils import _randomize_features

print("Reading and preparing input files", flush=True)
adata = ad.read_h5ad(par["input_unintegrated"])

adata.obs["batch_str"] = adata.obs["batch"].astype(str)

markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy()

adata = adata[:, markers_to_correct]

print("Randomise features", flush=True)
integrated = _randomize_features(
adata.layers["preprocessed"],
partition=adata.obs["cell_type"],
)

# create new anndata
output = ad.AnnData(
obs=adata.obs[[]],
var=adata.var[[]],
layers={"integrated": integrated},
uns={
"dataset_id": adata.uns["dataset_id"],
"method_id": meta["name"],
"parameters": {},
},
)

print("Write output AnnData to file", flush=True)
output.write_h5ad(par["output"], compression="gzip")
16 changes: 16 additions & 0 deletions src/control_methods/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy as np

def _randomize_features(X, partition=None):
"""
Taken and adapted from opsca-v1:
https://github.com/openproblems-bio/openproblems/blob/acf5c95a7306b819c4a13972783433d0a48f769b/openproblems/tasks/_batch_integration/_common/methods/baseline.py#L13
"""
X_out = X.copy()
if partition is None:
partition = np.full(X.shape[0], 0)
else:
partition = np.asarray(partition)
for partition_name in np.unique(partition):
partition_idx = np.argwhere(partition == partition_name).flatten()
X_out[partition_idx] = X[np.random.permutation(partition_idx)]
return X_out

0 comments on commit 71f05f6

Please sign in to comment.