diff --git a/notebooks/create_resources.ipynb b/notebooks/create_resources.ipynb index de7ac8f2f..f0f5b314f 100644 --- a/notebooks/create_resources.ipynb +++ b/notebooks/create_resources.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -166,23 +166,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# test rresources" + "# test resources" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import os\n", "test_resource_dir = f'{resource_dir}/../../resources_test/grn-benchmark'\n", - "os.makedirs(test_resource_dir, exist_ok=True)" + "os.makedirs(test_resource_dir, exist_ok=True)\n" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -192,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -201,12 +201,14 @@ "hvgs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['hvgs']\n", "genes_multi = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['gene_names']\n", "tfs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['tf_list']\n", - "genes = set(tfs) & set(genes_multi)\n" + "genes = set(tfs) & set(genes_multi)\n", + "\n", + "peaks = np.random.choice(peaks, 1000)\n" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -219,17 +221,17 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "View of AnnData object with n_obs × n_vars = 1000 × 4962\n", + "View of AnnData object with n_obs × n_vars = 1000 × 868\n", " obs: 'cell_type', 'donor_id'" ] }, - "execution_count": 77, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -242,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ diff --git a/src/methods/figr/script.R b/src/methods/figr/script.R index 1d5910933..05ac04ba7 100644 --- a/src/methods/figr/script.R +++ b/src/methods/figr/script.R @@ -11,8 +11,8 @@ par <- list( multiomics_rna = "resources_test/grn-benchmark/multiomics_r/rna.rds", multiomics_atac = "resources_test/grn-benchmark/multiomics_r/atac.rds", temp_dir = "output/figr/", - cell_topic = "resources/grn-benchmark/supp/cell_topic.csv", - num_workers = 40, + cell_topic = "resources_test/grn-benchmark/supp/cell_topic.csv", + num_workers = 1, n_topics = 48, peak_gene = "output/figr/peak_gene.csv", prediction= "output/figr/prediction.csv" @@ -21,6 +21,7 @@ par <- list( # functionality_name = "my_method_r" # ) ## VIASH END +dir.create(par$temp_dir, recursive = TRUE, showWarnings = TRUE) cellknn_func <- function(par) { ## load cell topic probabilities and create cell-cluster matrix @@ -30,9 +31,9 @@ cellknn_func <- function(par) { cellkNN <- get.knn(cell_topic, k=par$n_topics)$nn.index rownames(cellkNN) <- rownames(cell_topic) print(dim(cellkNN)) + print(paste0(par$temp_dir, "cellkNN.rds")) saveRDS(cellkNN, paste0(par$temp_dir, "cellkNN.rds")) } -print(par) cellknn_func(par) ## Step1: Peak-gene association testing @@ -48,6 +49,8 @@ peak_gene_func <- function(par){ write.csv(cisCorr, paste0(par$temp_dir, "cisCorr.csv"), row.names = TRUE) } +peak_gene_func(par) + ## Step 2: create DORCs and smooth them dorc_genes_func <- function(par){ cisCorr = read.csv(paste0(par$temp_dir, "cisCorr.csv")) @@ -61,10 +64,12 @@ dorc_genes_func <- function(par){ dorcTab = cisCorr.filt, geneList = allGenes, nCores = par$num_workers) - + print(print(paste0(par$temp_dir, "cellkNN.rds"))) cellkNN = readRDS(paste0(par$temp_dir, "cellkNN.rds")) # Smooth dorc scores using cell KNNs (k=n_topics) n_topics = par$n_topics + common_cells <- intersect(rownames(cellkNN), colnames(rna)) + cellkNN = cellkNN[common_cells,] dorcMat.s <- smoothScoresNN(NNmat = cellkNN[,1:n_topics], mat = dorcMat, nCores = 4) # Smooth RNA using cell KNNs @@ -76,6 +81,7 @@ dorc_genes_func <- function(par){ saveRDS(RNAmat.s, paste0(par$temp_dir, "RNAmat.s.RDS")) saveRDS(dorcMat.s, paste0(par$temp_dir, "dorcMat.s.RDS")) } +dorc_genes_func(par) ## TF-gene associations tf_gene_association_func <- function(par){ @@ -93,7 +99,7 @@ tf_gene_association_func <- function(par){ write.csv(figR.d, paste0(par$temp_dir, "figR.d.csv")) } - +tf_gene_association_func(par) extract_peak_gene_func <- function(par) { # Read the CSV file peak_gene_figr <- read.csv(file.path(par$temp_dir, "cisCorr.filt.csv")) @@ -117,6 +123,7 @@ extract_peak_gene_func <- function(par) { # Write the result to a CSV file write.csv(peak_gene_figr, file = par$peak_gene, row.names = FALSE) } +extract_peak_gene_func(par) filter_figr_grn <- function(par) { # Read the CSV file @@ -145,8 +152,4 @@ filter_figr_grn <- function(par) { } -# peak_gene_func(par) -# dorc_genes_func(par) -# tf_gene_association_func(par) -# extract_peak_gene_func(par) -# filter_figr_grn(par) \ No newline at end of file +filter_figr_grn(par) \ No newline at end of file diff --git a/src/methods/scglue/main.py b/src/methods/scglue/main.py index e0a112f12..23f8aec7f 100644 --- a/src/methods/scglue/main.py +++ b/src/methods/scglue/main.py @@ -71,6 +71,7 @@ def preprocess(rna, atac, par): nx.write_graphml(guidance, f"{par['temp_dir']}/guidance.graphml.gz") def training(par): + os.makedirs(f"{par['temp_dir']}/glue", exist_ok=True) rna = ad.read_h5ad(f"{par['temp_dir']}/rna.h5ad") atac = ad.read_h5ad(f"{par['temp_dir']}/atac.h5ad") guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz") @@ -82,6 +83,7 @@ def training(par): atac, "NB", use_highly_variable=False, use_rep="X_lsi" ) + glue = scglue.models.fit_SCGLUE( {"rna": rna, "atac": atac}, guidance, diff --git a/src/methods/scglue/script.py b/src/methods/scglue/script.py index 7f5970fee..19aff33c8 100644 --- a/src/methods/scglue/script.py +++ b/src/methods/scglue/script.py @@ -1,6 +1,7 @@ import pandas as pd import anndata as ad import sys +import os ## VIASH START par = { @@ -17,8 +18,10 @@ # "resources_dir":'resources' # } + sys.path.append(meta["resources_dir"]) from main import main +os.makedirs(par['temp_dir'], exist_ok=True) prediction = main(par) print('Write output to file', flush=True) diff --git a/src/pre_methods/format_multiomics_R/script.R b/src/pre_methods/format_multiomics_R/script.R deleted file mode 100644 index 86cc557c7..000000000 --- a/src/pre_methods/format_multiomics_R/script.R +++ /dev/null @@ -1,42 +0,0 @@ -library(anndata) - - -## VIASH START -par <- list( - multiomics_rna = "resources_test/grn-benchmark/multiomics_rna.h5ad", - multiomics_atac = "resources_test/grn-benchmark/multiomics_atac.h5ad", - temp_dir = 'output/temp_figr/', - num_workers = 4, - n_topics = 48, - prediction= "output/prediction.csv" -) -# meta <- list( -# functionality_name = "my_method_r" -# ) -## VIASH END - -cat("Reading input files\n") -input_train <- anndata::read_h5ad(par[["input_train"]]) -input_test <- anndata::read_h5ad(par[["input_test"]]) - -cat("Preprocess data\n") -# ... preprocessing ... - -cat("Train model\n") -# ... train model ... - -cat("Generate predictions\n") -# ... generate predictions ... - -cat("Write output AnnData to file\n") -output <- anndata::AnnData( - obs = list( - label_pred = obs_label_pred - ), - uns = list( - dataset_id = input_train$uns[["dataset_id"]], - normalization_id = input_train$uns[["normalization_id"]], - method_id = meta[["functionality_name"]] - ) -) -output$write_h5ad(par[["output"]], compression = "gzip") \ No newline at end of file diff --git a/src/process_data/adata_2_matrix/config.novsh.yaml b/src/process_data/adata_2_matrix/config.novsh.yaml new file mode 100644 index 000000000..6b0c0a9f3 --- /dev/null +++ b/src/process_data/adata_2_matrix/config.novsh.yaml @@ -0,0 +1,29 @@ +functionality: + name: reformat_resources_r + info: + label: reformat_resources_r + summary: "Converts data to format needed for R methods." + + arguments: + - name: --multiomics_rna + __merge__: ../../api/file_multiomics_rna_h5ad.yaml + required: True + direction: input + - name: --multiomics_atac + __merge__: ../../api/file_multiomics_atac_h5ad.yaml + required: false + direction: input + + resources: + - type: python_script + path: script.py + + +platforms: + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + + - type: native + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/pre_methods/format_multiomics_R/format_data.py b/src/process_data/adata_2_matrix/script.py similarity index 87% rename from src/pre_methods/format_multiomics_R/format_data.py rename to src/process_data/adata_2_matrix/script.py index a8e5f00fa..d8125b5c0 100644 --- a/src/pre_methods/format_multiomics_R/format_data.py +++ b/src/process_data/adata_2_matrix/script.py @@ -7,9 +7,9 @@ from scipy.io import mmwrite par = { - "multiomics_rna": "resources/grn-benchmark/multiomics_rna.h5ad", - "multiomics_atac": "resources/grn-benchmark/multiomics_atac.h5ad", - "temp_dir": 'output/figr' + "multiomics_rna": "resources_test/grn-benchmark/multiomics_rna.h5ad", + "multiomics_atac": "resources_test/grn-benchmark/multiomics_atac.h5ad", + "temp_dir": 'resources_test/matrixdata' } @@ -46,4 +46,6 @@ def format_data(par): print('Format data completed', flush=True) -format_data(par) \ No newline at end of file +format_data(par) + + diff --git a/src/pre_methods/cistopic/cistopic.py b/src/process_data/cistopic/cistopic.py similarity index 100% rename from src/pre_methods/cistopic/cistopic.py rename to src/process_data/cistopic/cistopic.py diff --git a/src/pre_methods/cistopic/config.novsh.yaml b/src/process_data/cistopic/config.novsh.yaml similarity index 100% rename from src/pre_methods/cistopic/config.novsh.yaml rename to src/process_data/cistopic/config.novsh.yaml diff --git a/src/pre_methods/format_multiomics_R/config.novsh.yaml b/src/process_data/format_multiomics_R/config.novsh.yaml similarity index 84% rename from src/pre_methods/format_multiomics_R/config.novsh.yaml rename to src/process_data/format_multiomics_R/config.novsh.yaml index 5ca88042d..9bbd8cdc6 100644 --- a/src/pre_methods/format_multiomics_R/config.novsh.yaml +++ b/src/process_data/format_multiomics_R/config.novsh.yaml @@ -13,6 +13,13 @@ functionality: __merge__: ../../api/file_multiomics_atac_h5ad.yaml required: false direction: input + + - name: --rna_rds + required: false + direction: output + - name: --atac_rna + required: false + direction: output resources: - type: r_script diff --git a/src/pre_methods/format_multiomics_R/format_data_r.R b/src/process_data/format_multiomics_R/format_data_r.R similarity index 90% rename from src/pre_methods/format_multiomics_R/format_data_r.R rename to src/process_data/format_multiomics_R/format_data_r.R index ce49adba4..36a44520a 100644 --- a/src/pre_methods/format_multiomics_R/format_data_r.R +++ b/src/process_data/format_multiomics_R/format_data_r.R @@ -3,13 +3,12 @@ library(FNN) library(chromVAR) library(doParallel) library(FigR) -library(optparse) # Example usage par <- list( - temp_dir = 'output/figr', - rna_rds = 'resources/grn-benchmark/multiomics_r/rna.rds', - atac_rds = 'resources/grn-benchmark/multiomics_r/atac.rds' + temp_dir = 'resources_test/matrixdata', + rna_rds = 'resources_test/grn-benchmark/multiomics_r/rna.rds', + atac_rds = 'resources_test/grn-benchmark/multiomics_r/atac.rds' )