Skip to content

Commit

Permalink
baseline corr updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 13, 2024
1 parent 85c6435 commit 4a35f67
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 137 deletions.
15 changes: 8 additions & 7 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
# reg_type=${1} #GB, ridge
viash ns build --parallel
reg_type=ridge

RUN_ID="grn_evaluation_all_${reg_type}"
Expand Down Expand Up @@ -80,15 +81,14 @@ HERE
# done

## controls
# append_entry_control "negative_control" "False" ""
# append_entry_control "positive_control" "False" ""
# append_entry_control "baseline_pearson" "False" "pearson"
# append_entry_control "negative_control" "" "" "false" "false" "false"
# append_entry_control "positive_control" "" "" "false" "false" "false"
append_entry_control "baseline_pearson" "False" "pearson" "false" "false" "false"
append_entry_control "baseline_dotproduct" "False" "dotproduct" "false" "false" "false"
# append_entry_control "baseline_pearson_causal" "True" "pearson"
append_entry_control "baseline_dotproduct_causal" "True" "dotproduct" "false" "false" "false"
# append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true"
# append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true"
# append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true"
append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true" "false" "false"
append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true" "false"
append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true"
# append_entry_control "baseline_corr_causal_spearman" "True" "spearman"


Expand All @@ -104,6 +104,7 @@ nextflow run . \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}
subl resources/results/grn_evaluation_all_ridge/scores.yaml

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_inference.git `
Expand Down
38 changes: 24 additions & 14 deletions src/control_methods/baseline_corr/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,45 +9,56 @@
## VIASH START
par = {
}

print(par)

## VIASH END

def select_top_links(net, par):
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net

def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
grn = pd.DataFrame()
i = 0
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
if method == "dotproduct":
print('dotproduct')
net = X_sub.T.dot(X_sub)
elif method == "pearson":
print('pearson')
net = np.corrcoef(X_sub.T)
net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0)
elif method == "spearman":
net = spearmanr(X_sub).statistic
net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0)


net = pd.DataFrame(net, index=gene_names, columns=gene_names)
if par['causal']:
print('causal')
net = net[tf_all]
else:
print('non causal')
net = net.sample(len(tf_all), axis=1, random_state=par['seed'])

# flatten to source-target-weight
net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net.rename(columns={'index': 'target'}, inplace=True)

net = select_top_links(net, par)
net['cell_type'] = group

grn = pd.concat([grn, net], axis=0).reset_index(drop=True)
if i==0:
grn = net
else:
grn = pd.concat([grn, net], axis=0).reset_index(drop=True)

i += 1

if par['cell_type_specific']==False:
print('Non specific')
grn.drop(columns=['cell_type'], inplace=True)
grn = grn.groupby(['source', 'target']).mean().reset_index()
print(grn)
net = select_top_links(net, par)

return grn
print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
multiomics_rna = multiomics_rna[:,:2000] #TODO: togo
# multiomics_rna = multiomics_rna[:,:5000] #TODO: togo

if par['metacell']:
print('metacell')
Expand Down Expand Up @@ -108,6 +119,5 @@ def create_meta_cells(df, n_cells=15):




print('Output GRN')
net.to_csv(par['prediction'])
5 changes: 5 additions & 0 deletions src/control_methods/baseline_corr/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
viash run src/control_methods/baseline_corr/config.vsh.yaml -- \
--prediction output/baseline_corr.csv \
--multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal true
7 changes: 4 additions & 3 deletions src/exp_analysis/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,23 @@ functionality:
type: file
required: false
direction: input
default: resources/grn-benchmark/perturbation_data.h5ad
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --tf_gene_net
- name: --prediction
type: file
required: true
direction: input
example: resources_test/grn_models/collectri.csv
- name: --peak_gene_net
type: file
required: false
direction: input
example: resources_test/peak_gene_models/collectri.csv
- name: --annot_peak_database
type: file
required: false
direction: input
must_exist: false
example: resources/supplements/annot_peak_database.csv
example: resources_test/supplementary/annot_peak_database.csv
# - name: --annot_gene_database
# type: file
# required: false
Expand Down
1 change: 0 additions & 1 deletion src/exp_analysis/run.sh

This file was deleted.

6 changes: 3 additions & 3 deletions src/exp_analysis/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
## VIASH START
par = {
"perturbation_data": "resources/grn-benchmark/pertubation_data.h5ad",
"tf_gene_net": "resources/grn-benchmark/grn_models/figr.csv",
"prediction": "resources/grn-benchmark/grn_models/figr.csv",
# "peak_gene_net": "resources/grn-benchmark/peak_gene_models/figr.csv",
"annot_peak_database": "resources/grn-benchmark/supp/annot_peak_database.csv",
"annot_gene_database": "resources/grn-benchmark/supp/annot_gene_database.csv",
Expand All @@ -24,13 +24,13 @@
print('Reading input files', flush=True)

perturbation_data = ad.read_h5ad(par["perturbation_data"])
tf_gene_net = pd.read_csv(par["tf_gene_net"])
prediction = pd.read_csv(par["prediction"])
# peak_gene_net = pd.read_csv(par["peak_gene_net"])
# annot_peak_database = pd.read_csv(par["annot_peak_database"])
# hvgs = pd.read_csv(par["hvgs"])

# peak_gene_net['source'] = peak_gene_net['peak']
info_obj = Explanatory_analysis(net=tf_gene_net)
info_obj = Explanatory_analysis(net=prediction)
print("Calculate basic stats")
stats = info_obj.calculate_basic_stats()
print(stats)
Expand Down
4 changes: 4 additions & 0 deletions src/exp_analysis/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
viash run src/exp_analysis/config.vsh.yaml -- \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
--prediction output/baseline_corr.csv \

5 changes: 5 additions & 0 deletions src/metrics/regression_1/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
viash run src/metrics/regression_1/config.vsh.yaml -- \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
--tf_all resources/prior/tf_all.csv \
--prediction output/baseline_corr.csv \
--score output/score.h5ad
Loading

0 comments on commit 4a35f67

Please sign in to comment.