Skip to content

Commit

Permalink
portia and grnboost edited
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 19, 2024
1 parent f15751f commit ec0eeb6
Show file tree
Hide file tree
Showing 16 changed files with 1,250 additions and 295 deletions.
1,106 changes: 1,106 additions & 0 deletions NN-grn-inference.ipynb

Large diffs are not rendered by default.

292 changes: 71 additions & 221 deletions runs.ipynb

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions scripts/run_benchmark_all.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="benchmark_donor_0_baselines_nonspecific_notnormalized"
RUN_ID="donor_0_normalized_noncelltype_hvg"
# resources_dir="./resources/"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="${resources_dir}/results/${RUN_ID}"

reg_type=ridge
subsample=-2
max_workers=10
num_workers=10
layer='scgen_pearson'
metric_ids="[regression_1, regression_2]"
cell_type_specific=false #for controls
normalize=false
only_hvgs=false
normalize=true
only_hvgs=true
# method_ids="[tigress, ennet, scsgl, pidc]"
method_ids="[pearson_corr, pearson_causal, positive_control]"

Expand All @@ -30,7 +30,7 @@ param_list:
multiomics_atac: ${resources_dir}/grn-benchmark/multiomics_atac_0.h5ad
reg_type: $reg_type
subsample: $subsample
max_workers: $max_workers
num_workers: $num_workers
layer: $layer
consensus: ${resources_dir}/prior/consensus-num-regulators.json
tf_all: ${resources_dir}/prior/tf_all.csv
Expand Down
11 changes: 0 additions & 11 deletions scripts/sbatch/batch_portia.sh

This file was deleted.

14 changes: 0 additions & 14 deletions scripts/sbatch/batch_scenic.sh

This file was deleted.

11 changes: 0 additions & 11 deletions scripts/sbatch/batch_scenicplus.sh

This file was deleted.

12 changes: 12 additions & 0 deletions scripts/sbatch/cpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
#SBATCH --time=24:00:00
#SBATCH --output=logs/%j.out
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --cpus-per-task=20
#SBATCH --mem=64G

echo "singularity exec ../../images/${1} python src/methods/single_omics/${2}/script.py "
singularity exec ../../images/${1} python src/methods/single_omics/${2}/script.py

File renamed without changes.
2 changes: 2 additions & 0 deletions src/methods/single_omics/genie3/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ functionality:
resources:
- type: python_script
path: script.py
- path: /src/utils/util.py
dest: util.py

platforms:
- type: docker
Expand Down
2 changes: 2 additions & 0 deletions src/methods/single_omics/grnboost2/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ functionality:
resources:
- type: python_script
path: script.py
- path: /src/utils/util.py
dest: util.py

platforms:
- type: docker
Expand Down
23 changes: 15 additions & 8 deletions src/methods/single_omics/grnboost2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,26 @@
par = {
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_0.h5ad',
"tf_all": 'resources/prior/tf_all.csv',
'prediction': 'output/grnboost2_donor_0.csv',
'prediction': 'output/grnboost2_donor_0_hvg.csv',
'max_n_links': 50000,
'cell_type_specific': False}
'cell_type_specific': False,
'normalize': False,
'only_hvgs': True
}
## VIASH END

def process_links(net, par):
net = net[net.source!=net.target]
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net
import sys
meta= {
"resources_dir": 'src/utils/'
}
sys.path.append(meta["resources_dir"])
from util import process_data, process_links
par['normalize']=False
# Load scRNA-seq data
print('Reading data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
# adata_rna.X = adata_rna.layers['lognorm'] #TODO: fix this
process_data(adata_rna, par)

groups = adata_rna.obs.cell_type
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X
Expand Down
2 changes: 2 additions & 0 deletions src/methods/single_omics/portia/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ functionality:
resources:
- type: python_script
path: script.py
- path: /src/utils/util.py
dest: util.py

platforms:
- type: docker
Expand Down
23 changes: 13 additions & 10 deletions src/methods/single_omics/portia/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,28 @@
par = {
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_0.h5ad',
'tf_all': 'resources/prior/tf_all.csv',
'prediction': 'output/portia_celltype_0.csv',
'prediction': 'output/portia_donor_0_hvgs.csv',
'max_n_links': 50000,
'cell_type_specific': True
'cell_type_specific': False,
'normalize': False,
'only_hvgs': True
}
## VIASH END


import sys
meta= {
"resources_dir": 'src/utils/'
}
sys.path.append(meta["resources_dir"])
from util import process_data, process_links
par['normalize']=False
# Load scRNA-seq data
print('Reading data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
process_data(adata_rna, par)

gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X.toarray() if scipy.sparse.issparse(adata_rna.X) else adata_rna.X

def process_links(net, par):
net = net[net.source!=net.target]
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net

# Load list of putative TFs
tfs = np.loadtxt(par['tf_all'], dtype=str)
tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]
Expand Down
2 changes: 2 additions & 0 deletions src/methods/single_omics/ppcor/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ functionality:
resources:
- type: r_script
path: script.R
- path: /src/utils/util.py
dest: util.py

platforms:
- type: docker
Expand Down
33 changes: 19 additions & 14 deletions src/methods/single_omics/scenic/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,34 @@

## VIASH START
par = {
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna.h5ad',
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_0.h5ad',
"tf_all": 'resources/prior/tf_all.csv',
'prediction': 'output/scenic/scenic.csv',
'prediction': 'output/scenic_0_hvgs.csv',
'temp_dir': 'output/scenic',
'num_workers': 20,
'max_n_links': 50000,
'rank_threshold': 10000,
'auc_threshold': 0.05,
'nes_threshold': 3.0,
'seed': "32"
'seed': "32",
'normalize': False,
'only_hvgs': True
}
## VIASH END
os.makedirs(par['temp_dir'], exist_ok=True)

# Load list of putative TFs
# df = pd.read_csv(par["tf_all"], header=None, names=['gene_name'])
# tfs = set(list(df['gene_name']))
# tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]
import sys
meta= {
"resources_dir": 'src/utils/'
}
sys.path.append(meta["resources_dir"])
from util import process_data, process_links
par['normalize']=False
# Load scRNA-seq data
print('Reading data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
process_data(adata_rna, par)

os.makedirs(par['temp_dir'], exist_ok=True)

databases = f"{par['temp_dir']}/databases/"
os.makedirs(databases, exist_ok=True)
Expand All @@ -54,16 +64,11 @@
response = requests.get("https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_500bp_up_100bp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather")
with open(par['genes_vs_motifs_500'], "wb") as file:
file.write(response.content)

expr_mat_adjacencies = os.path.join(par['temp_dir'], "expr_mat_adjacencies.tsv")
expression_data = os.path.join(par['temp_dir'], "expression_data.tsv")
regulons = f"{par['temp_dir']}/regulons.csv"

def format_data(par):
print('Read data')
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
gene_names = adata_rna.var_names
pd.DataFrame(adata_rna.X.todense(), columns=gene_names).to_csv(expression_data, sep='\t', index=False)


def run_grn(par):
print('Run grn')
Expand Down
2 changes: 1 addition & 1 deletion src/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import anndata as ad
import numpy as np
from tqdm import tqdm
import scanpy as sc
from sklearn.preprocessing import StandardScaler
import scipy.sparse as sp

Expand Down Expand Up @@ -33,6 +32,7 @@ def corr_net(X, gene_names, par, tf_all, causal=False):

def process_data(adata, par):
if par['normalize']:
import scanpy as sc
print('Noramlize data')
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
Expand Down

0 comments on commit ec0eeb6

Please sign in to comment.