generated from openproblems-bio/task_template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added three negative control methods (#8)
* Added three negative control methods * remove unneeded info
- Loading branch information
Showing
8 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
__merge__: /src/api/comp_control_method.yaml | ||
name: shuffle_integration | ||
label: Shuffle integration | ||
summary: Integrations are randomly permuted | ||
description: Integrations are randomly permuted | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: /src/control_methods/utils.py | ||
engines: | ||
- type: docker | ||
image: openproblems/base_python:1.0.0 | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, midmem, lowcpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import anndata as ad | ||
import sys | ||
|
||
## VIASH START | ||
par = { | ||
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", | ||
"output": "output.h5ad", | ||
} | ||
meta = {"name": "harmonypy"} | ||
## VIASH END | ||
|
||
print("Importing helper functions", flush=True) | ||
sys.path.append(meta["resources_dir"]) | ||
from utils import _randomize_features | ||
|
||
print("Reading and preparing input files", flush=True) | ||
adata = ad.read_h5ad(par["input_unintegrated"]) | ||
|
||
adata.obs["batch_str"] = adata.obs["batch"].astype(str) | ||
|
||
markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() | ||
|
||
adata = adata[:, markers_to_correct] | ||
|
||
print("Randomise features", flush=True) | ||
integrated = _randomize_features( | ||
adata.layers["preprocessed"] | ||
) | ||
|
||
# create new anndata | ||
output = ad.AnnData( | ||
obs=adata.obs[[]], | ||
var=adata.var[[]], | ||
layers={"integrated": integrated}, | ||
uns={ | ||
"dataset_id": adata.uns["dataset_id"], | ||
"method_id": meta["name"], | ||
"parameters": {}, | ||
}, | ||
) | ||
|
||
print("Write output AnnData to file", flush=True) | ||
output.write_h5ad(par["output"], compression="gzip") |
17 changes: 17 additions & 0 deletions
17
src/control_methods/shuffle_integration_by_batch/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
__merge__: /src/api/comp_control_method.yaml | ||
name: shuffle_integration_by_batch | ||
label: Shuffle integration by batch | ||
summary: Integrations are randomly permuted within each batch | ||
description: Integrations are randomly permuted within each batch | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: /src/control_methods/utils.py | ||
engines: | ||
- type: docker | ||
image: openproblems/base_python:1.0.0 | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, midmem, lowcpu] |
44 changes: 44 additions & 0 deletions
44
src/control_methods/shuffle_integration_by_batch/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import anndata as ad | ||
import sys | ||
|
||
## VIASH START | ||
par = { | ||
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", | ||
"output": "output.h5ad", | ||
} | ||
meta = {"name": "harmonypy"} | ||
## VIASH END | ||
|
||
print("Importing helper functions", flush=True) | ||
sys.path.append(meta["resources_dir"]) | ||
from utils import _randomize_features | ||
|
||
print("Reading and preparing input files", flush=True) | ||
adata = ad.read_h5ad(par["input_unintegrated"]) | ||
|
||
adata.obs["batch_str"] = adata.obs["batch"].astype(str) | ||
|
||
markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() | ||
|
||
adata = adata[:, markers_to_correct] | ||
|
||
print("Randomise features", flush=True) | ||
integrated = _randomize_features( | ||
adata.layers["preprocessed"], | ||
partition=adata.obs["batch"], | ||
) | ||
|
||
# create new anndata | ||
output = ad.AnnData( | ||
obs=adata.obs[[]], | ||
var=adata.var[[]], | ||
layers={"integrated": integrated}, | ||
uns={ | ||
"dataset_id": adata.uns["dataset_id"], | ||
"method_id": meta["name"], | ||
"parameters": {}, | ||
}, | ||
) | ||
|
||
print("Write output AnnData to file", flush=True) | ||
output.write_h5ad(par["output"], compression="gzip") |
17 changes: 17 additions & 0 deletions
17
src/control_methods/shuffle_integration_by_cell_type/config.vsh.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
__merge__: /src/api/comp_control_method.yaml | ||
name: shuffle_integration_by_cell_type | ||
label: Shuffle integration by cell type | ||
summary: Integrations are randomly permuted within each cell type | ||
description: Integrations are randomly permuted within each cell type | ||
resources: | ||
- type: python_script | ||
path: script.py | ||
- path: /src/control_methods/utils.py | ||
engines: | ||
- type: docker | ||
image: openproblems/base_python:1.0.0 | ||
runners: | ||
- type: executable | ||
- type: nextflow | ||
directives: | ||
label: [midtime, midmem, lowcpu] |
44 changes: 44 additions & 0 deletions
44
src/control_methods/shuffle_integration_by_cell_type/script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import anndata as ad | ||
import sys | ||
|
||
## VIASH START | ||
par = { | ||
"input_unintegrated": "resources_test/task_cyto_batch_integration/starter_file/unintegrated_censored.h5ad", | ||
"output": "output.h5ad", | ||
} | ||
meta = {"name": "harmonypy"} | ||
## VIASH END | ||
|
||
print("Importing helper functions", flush=True) | ||
sys.path.append(meta["resources_dir"]) | ||
from utils import _randomize_features | ||
|
||
print("Reading and preparing input files", flush=True) | ||
adata = ad.read_h5ad(par["input_unintegrated"]) | ||
|
||
adata.obs["batch_str"] = adata.obs["batch"].astype(str) | ||
|
||
markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy() | ||
|
||
adata = adata[:, markers_to_correct] | ||
|
||
print("Randomise features", flush=True) | ||
integrated = _randomize_features( | ||
adata.layers["preprocessed"], | ||
partition=adata.obs["cell_type"], | ||
) | ||
|
||
# create new anndata | ||
output = ad.AnnData( | ||
obs=adata.obs[[]], | ||
var=adata.var[[]], | ||
layers={"integrated": integrated}, | ||
uns={ | ||
"dataset_id": adata.uns["dataset_id"], | ||
"method_id": meta["name"], | ||
"parameters": {}, | ||
}, | ||
) | ||
|
||
print("Write output AnnData to file", flush=True) | ||
output.write_h5ad(par["output"], compression="gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import numpy as np | ||
|
||
def _randomize_features(X, partition=None): | ||
""" | ||
Taken and adapted from opsca-v1: | ||
https://github.com/openproblems-bio/openproblems/blob/acf5c95a7306b819c4a13972783433d0a48f769b/openproblems/tasks/_batch_integration/_common/methods/baseline.py#L13 | ||
""" | ||
X_out = X.copy() | ||
if partition is None: | ||
partition = np.full(X.shape[0], 0) | ||
else: | ||
partition = np.asarray(partition) | ||
for partition_name in np.unique(partition): | ||
partition_idx = np.argwhere(partition == partition_name).flatten() | ||
X_out[partition_idx] = X[np.random.permutation(partition_idx)] | ||
return X_out |