Skip to content

Commit

Permalink
test files rds added
Browse files Browse the repository at this point in the history
  • Loading branch information
matin authored and matin committed Jul 23, 2024
1 parent 4b60ce1 commit 75ba304
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 72 deletions.
26 changes: 14 additions & 12 deletions notebooks/create_resources.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -166,23 +166,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# test rresources"
"# test resources"
]
},
{
"cell_type": "code",
"execution_count": 76,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"test_resource_dir = f'{resource_dir}/../../resources_test/grn-benchmark'\n",
"os.makedirs(test_resource_dir, exist_ok=True)"
"os.makedirs(test_resource_dir, exist_ok=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 69,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -201,12 +201,14 @@
"hvgs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['hvgs']\n",
"genes_multi = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['gene_names']\n",
"tfs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['tf_list']\n",
"genes = set(tfs) & set(genes_multi)\n"
"genes = set(tfs) & set(genes_multi)\n",
"\n",
"peaks = np.random.choice(peaks, 1000)\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -219,17 +221,17 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"View of AnnData object with n_obs × n_vars = 1000 × 4962\n",
"View of AnnData object with n_obs × n_vars = 1000 × 868\n",
" obs: 'cell_type', 'donor_id'"
]
},
"execution_count": 77,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -242,7 +244,7 @@
},
{
"cell_type": "code",
"execution_count": 78,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand Down
23 changes: 13 additions & 10 deletions src/methods/figr/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ par <- list(
multiomics_rna = "resources_test/grn-benchmark/multiomics_r/rna.rds",
multiomics_atac = "resources_test/grn-benchmark/multiomics_r/atac.rds",
temp_dir = "output/figr/",
cell_topic = "resources/grn-benchmark/supp/cell_topic.csv",
num_workers = 40,
cell_topic = "resources_test/grn-benchmark/supp/cell_topic.csv",
num_workers = 1,
n_topics = 48,
peak_gene = "output/figr/peak_gene.csv",
prediction= "output/figr/prediction.csv"
Expand All @@ -21,6 +21,7 @@ par <- list(
# functionality_name = "my_method_r"
# )
## VIASH END
dir.create(par$temp_dir, recursive = TRUE, showWarnings = TRUE)

cellknn_func <- function(par) {
## load cell topic probabilities and create cell-cluster matrix
Expand All @@ -30,9 +31,9 @@ cellknn_func <- function(par) {
cellkNN <- get.knn(cell_topic, k=par$n_topics)$nn.index
rownames(cellkNN) <- rownames(cell_topic)
print(dim(cellkNN))
print(paste0(par$temp_dir, "cellkNN.rds"))
saveRDS(cellkNN, paste0(par$temp_dir, "cellkNN.rds"))
}
print(par)
cellknn_func(par)

## Step1: Peak-gene association testing
Expand All @@ -48,6 +49,8 @@ peak_gene_func <- function(par){
write.csv(cisCorr, paste0(par$temp_dir, "cisCorr.csv"), row.names = TRUE)
}

peak_gene_func(par)

## Step 2: create DORCs and smooth them
dorc_genes_func <- function(par){
cisCorr = read.csv(paste0(par$temp_dir, "cisCorr.csv"))
Expand All @@ -61,10 +64,12 @@ dorc_genes_func <- function(par){
dorcTab = cisCorr.filt,
geneList = allGenes,
nCores = par$num_workers)

print(print(paste0(par$temp_dir, "cellkNN.rds")))
cellkNN = readRDS(paste0(par$temp_dir, "cellkNN.rds"))
# Smooth dorc scores using cell KNNs (k=n_topics)
n_topics = par$n_topics
common_cells <- intersect(rownames(cellkNN), colnames(rna))
cellkNN = cellkNN[common_cells,]
dorcMat.s <- smoothScoresNN(NNmat = cellkNN[,1:n_topics], mat = dorcMat, nCores = 4)

# Smooth RNA using cell KNNs
Expand All @@ -76,6 +81,7 @@ dorc_genes_func <- function(par){
saveRDS(RNAmat.s, paste0(par$temp_dir, "RNAmat.s.RDS"))
saveRDS(dorcMat.s, paste0(par$temp_dir, "dorcMat.s.RDS"))
}
dorc_genes_func(par)

## TF-gene associations
tf_gene_association_func <- function(par){
Expand All @@ -93,7 +99,7 @@ tf_gene_association_func <- function(par){

write.csv(figR.d, paste0(par$temp_dir, "figR.d.csv"))
}

tf_gene_association_func(par)
extract_peak_gene_func <- function(par) {
# Read the CSV file
peak_gene_figr <- read.csv(file.path(par$temp_dir, "cisCorr.filt.csv"))
Expand All @@ -117,6 +123,7 @@ extract_peak_gene_func <- function(par) {
# Write the result to a CSV file
write.csv(peak_gene_figr, file = par$peak_gene, row.names = FALSE)
}
extract_peak_gene_func(par)

filter_figr_grn <- function(par) {
# Read the CSV file
Expand Down Expand Up @@ -145,8 +152,4 @@ filter_figr_grn <- function(par) {
}


# peak_gene_func(par)
# dorc_genes_func(par)
# tf_gene_association_func(par)
# extract_peak_gene_func(par)
# filter_figr_grn(par)
filter_figr_grn(par)
2 changes: 2 additions & 0 deletions src/methods/scglue/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def preprocess(rna, atac, par):
nx.write_graphml(guidance, f"{par['temp_dir']}/guidance.graphml.gz")

def training(par):
os.makedirs(f"{par['temp_dir']}/glue", exist_ok=True)
rna = ad.read_h5ad(f"{par['temp_dir']}/rna.h5ad")
atac = ad.read_h5ad(f"{par['temp_dir']}/atac.h5ad")
guidance = nx.read_graphml(f"{par['temp_dir']}/guidance.graphml.gz")
Expand All @@ -82,6 +83,7 @@ def training(par):
atac, "NB", use_highly_variable=False,
use_rep="X_lsi"
)


glue = scglue.models.fit_SCGLUE(
{"rna": rna, "atac": atac}, guidance,
Expand Down
3 changes: 3 additions & 0 deletions src/methods/scglue/script.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import anndata as ad
import sys
import os

## VIASH START
par = {
Expand All @@ -17,8 +18,10 @@
# "resources_dir":'resources'
# }


sys.path.append(meta["resources_dir"])
from main import main
os.makedirs(par['temp_dir'], exist_ok=True)
prediction = main(par)

print('Write output to file', flush=True)
Expand Down
42 changes: 0 additions & 42 deletions src/pre_methods/format_multiomics_R/script.R

This file was deleted.

29 changes: 29 additions & 0 deletions src/process_data/adata_2_matrix/config.novsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
functionality:
name: reformat_resources_r
info:
label: reformat_resources_r
summary: "Converts data to format needed for R methods."

arguments:
- name: --multiomics_rna
__merge__: ../../api/file_multiomics_rna_h5ad.yaml
required: True
direction: input
- name: --multiomics_atac
__merge__: ../../api/file_multiomics_atac_h5ad.yaml
required: false
direction: input

resources:
- type: python_script
path: script.py


platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4

- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from scipy.io import mmwrite

par = {
"multiomics_rna": "resources/grn-benchmark/multiomics_rna.h5ad",
"multiomics_atac": "resources/grn-benchmark/multiomics_atac.h5ad",
"temp_dir": 'output/figr'
"multiomics_rna": "resources_test/grn-benchmark/multiomics_rna.h5ad",
"multiomics_atac": "resources_test/grn-benchmark/multiomics_atac.h5ad",
"temp_dir": 'resources_test/matrixdata'

}

Expand Down Expand Up @@ -46,4 +46,6 @@ def format_data(par):

print('Format data completed', flush=True)

format_data(par)
format_data(par)


File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ functionality:
__merge__: ../../api/file_multiomics_atac_h5ad.yaml
required: false
direction: input

- name: --rna_rds
required: false
direction: output
- name: --atac_rna
required: false
direction: output

resources:
- type: r_script
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ library(FNN)
library(chromVAR)
library(doParallel)
library(FigR)
library(optparse)

# Example usage
par <- list(
temp_dir = 'output/figr',
rna_rds = 'resources/grn-benchmark/multiomics_r/rna.rds',
atac_rds = 'resources/grn-benchmark/multiomics_r/atac.rds'
temp_dir = 'resources_test/matrixdata',
rna_rds = 'resources_test/grn-benchmark/multiomics_r/rna.rds',
atac_rds = 'resources_test/grn-benchmark/multiomics_r/atac.rds'
)


Expand Down

0 comments on commit 75ba304

Please sign in to comment.