From 56460b49d6c52fd90d8fb1abd4aef969ff54b04d Mon Sep 17 00:00:00 2001 From: jalil Date: Sat, 14 Sep 2024 10:32:28 +0200 Subject: [PATCH] scenic added --- runs.ipynb | 19 ++++- scripts/run_benchmark_single_omics.sh | 2 +- scripts/run_grn_evaluation.sh | 1 + .../single_omics/scenic/config.vsh.yaml | 27 +++++++ src/methods/single_omics/scenic/script.py | 71 +++++++++++++++++++ src/methods/single_omics/scenic/test.sh | 1 + 6 files changed, 117 insertions(+), 4 deletions(-) create mode 100644 src/methods/single_omics/scenic/config.vsh.yaml create mode 100644 src/methods/single_omics/scenic/script.py create mode 100644 src/methods/single_omics/scenic/test.sh diff --git a/runs.ipynb b/runs.ipynb index de735f597..ba872a2a6 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -2583,11 +2583,24 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 131, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/state.yaml to rsesources/results/single_omics_inference/state.yaml\n", + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/trace.txt to rsesources/results/single_omics_inference/trace.txt\n", + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/scores.yaml to rsesources/results/single_omics_inference/scores.yaml\n", + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.ennet.ennet.prediction.csv to rsesources/results/single_omics_inference/ridge.ennet.ennet.prediction.csv\n", + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.pidc.pidc.prediction.csv to rsesources/results/single_omics_inference/ridge.pidc.pidc.prediction.csv\n", + "download: s3://openproblems-data/resources_test/grn/results/single_omics_inference/ridge.tigress.tigress.prediction.csv to rsesources/results/single_omics_inference/ridge.tigress.tigress.prediction.csv\n" + ] + } + ], "source": [ - "!aws s3 sync s3://openproblems-data/resources/grn/results/single_omics_all resources/results/single_omics_all" + "!aws s3 sync s3://openproblems-data/resources_test/grn/results/single_omics_inference rsesources/results/single_omics_inference" ] }, { diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh index 3b8e39d51..b13dbb62c 100644 --- a/scripts/run_benchmark_single_omics.sh +++ b/scripts/run_benchmark_single_omics.sh @@ -3,7 +3,7 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" RUN_ID="single_omics_inference" # resources_dir="./resources_test/" -resources_dir="s3://openproblems-data/resources_test/grn" +resources_dir="s3://openproblems-data/resources/grn" publish_dir="${resources_dir}/results/${RUN_ID}" diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh index 20e1607d6..f975f0922 100644 --- a/scripts/run_grn_evaluation.sh +++ b/scripts/run_grn_evaluation.sh @@ -36,6 +36,7 @@ baseline_models=( baseline_pearson_causal baseline_pearson_causal_celltype baseline_pearson_causal_metacell + baseline_pearson_causal_impute positive_control ) # Start writing to the YAML file diff --git a/src/methods/single_omics/scenic/config.vsh.yaml b/src/methods/single_omics/scenic/config.vsh.yaml new file mode 100644 index 000000000..872df7f81 --- /dev/null +++ b/src/methods/single_omics/scenic/config.vsh.yaml @@ -0,0 +1,27 @@ +__merge__: ../../../api/comp_method.yaml + +functionality: + name: scenic + namespace: "grn_methods" + info: + label: scenic + summary: "GRN inference using scenic" + + + resources: + - type: python_script + path: script.py + +platforms: + - type: docker + image: aertslab/pyscenic:0.12.1 + setup: + # - type: docker + # run: | + # conda install -y -c bioconda arboreto pandas + - type: python + packages: [ anndata ] + - type: native + - type: nextflow + directives: + label: [onedaytime, midmem, midcpu] diff --git a/src/methods/single_omics/scenic/script.py b/src/methods/single_omics/scenic/script.py new file mode 100644 index 000000000..f44e2e743 --- /dev/null +++ b/src/methods/single_omics/scenic/script.py @@ -0,0 +1,71 @@ +import os + +import anndata +import numpy as np +import pandas as pd +from arboreto.algo import grnboost2 +from distributed import Client + + +## VIASH START +par = { + 'multiomics_rna': 'resources_test/grn-benchmark/multiomics_rna.h5ad', + "tf_all": 'resources/prior/tf_all.csv', + 'prediction': 'output/grnboost2/prediction.csv', + 'max_n_links': 50000 +} +## VIASH END +os.makedirs(par['temp_dir'], exist_ok=True) + +# Load scRNA-seq data +adata_rna = anndata.read_h5ad(par['multiomics_rna']) +gene_names = adata_rna.var.gene_ids.index.to_numpy() +X = adata_rna.X.toarray() + +# Load list of putative TFs +# df = pd.read_csv(par["tf_all"], header=None, names=['gene_name']) +# tfs = set(list(df['gene_name'])) +# tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)] + +# format output +expression_data = f"{par['temp_dir']}/expression_data.tsv" +pd.DataFrame(X, columns=gene_names).to_csv(expression_data, sep='\t', index=False) + +expr_mat_adjacencies = f"{par['temp_dir']}/expr_mat_adjacencies.tsv" +command = [ + "pyscenic", "grn", + "--num_workers", par['max_workers'], + "-o", expr_mat_adjacencies, + expression_data, + par['tf_all'] +] + +# Run grn +import subprocess +subprocess.run(command, check=True) + + +# Run prune +regulons = f"{par['temp_dir']}/regulons.csv" +annotations_fname = "/data/motifs-v9-nr.hgnc-m0.001-o0.0.tbl" +ranking_1 = "/data/hg19-tss-centered-5kb-7species.mc9nr.genes_vs_motifs.rankings.feather " +ranking_2 = /data/hg19-tss-centered-10kb-7species.mc9nr.genes_vs_motifs.rankings.feather +command = [ + "pyscenic", "ctx", + expr_mat_adjacencies, ranking_1, ranking_2, + "--annotations_fname", annotations_fname, + "--expression_mtx_fname", expression_data, + "--mode", "custom_multiprocessing", + "--output", regulons, + "--num_workers", par['max_workers'] +] +subprocess.run(command, check=True) + +# Save inferred GRN +print(expr_mat_adjacencies) +network = pd.read_csv(expr_mat_adjacencies, sep='\t') +network.to_csv(par['prediction'], sep=',') + +print('Finished.') + + diff --git a/src/methods/single_omics/scenic/test.sh b/src/methods/single_omics/scenic/test.sh new file mode 100644 index 000000000..f7af173e6 --- /dev/null +++ b/src/methods/single_omics/scenic/test.sh @@ -0,0 +1 @@ +viash run src/methods/single_omics/scenic/config.vsh.yaml -- --multiomics_rna resources_test/grn-benchmark/multiomics_rna.h5ad --tf_all resources/prior/tf_all.csv --prediction output/scenic_prediction.csv --temp_dir output/scenic \ No newline at end of file