diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh index 24d71eb88..389f51a39 100644 --- a/scripts/run_grn_evaluation.sh +++ b/scripts/run_grn_evaluation.sh @@ -2,6 +2,7 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" # reg_type=${1} #GB, ridge +viash ns build --parallel reg_type=ridge RUN_ID="grn_evaluation_all_${reg_type}" @@ -80,15 +81,14 @@ HERE # done ## controls -# append_entry_control "negative_control" "False" "" -# append_entry_control "positive_control" "False" "" -# append_entry_control "baseline_pearson" "False" "pearson" +# append_entry_control "negative_control" "" "" "false" "false" "false" +# append_entry_control "positive_control" "" "" "false" "false" "false" +append_entry_control "baseline_pearson" "False" "pearson" "false" "false" "false" append_entry_control "baseline_dotproduct" "False" "dotproduct" "false" "false" "false" -# append_entry_control "baseline_pearson_causal" "True" "pearson" append_entry_control "baseline_dotproduct_causal" "True" "dotproduct" "false" "false" "false" -# append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true" -# append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true" -# append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true" +append_entry_control "baseline_dotproduct_causal_cell_type" "True" "dotproduct" "true" "false" "false" +append_entry_control "baseline_dotproduct_causal_metacell" "True" "dotproduct" "false" "true" "false" +append_entry_control "baseline_dotproduct_causal_impute" "True" "dotproduct" "false" "false" "true" # append_entry_control "baseline_corr_causal_spearman" "True" "spearman" @@ -104,6 +104,7 @@ nextflow run . \ -with-trace \ -c src/common/nextflow_helpers/labels_ci.config \ -params-file ${param_file} +subl resources/results/grn_evaluation_all_ridge/scores.yaml # ./tw-windows-x86_64.exe launch ` # https://github.com/openproblems-bio/task_grn_inference.git ` diff --git a/src/control_methods/baseline_corr/script.py b/src/control_methods/baseline_corr/script.py index 760335b11..0b5580cdc 100644 --- a/src/control_methods/baseline_corr/script.py +++ b/src/control_methods/baseline_corr/script.py @@ -9,45 +9,56 @@ ## VIASH START par = { } + +print(par) + ## VIASH END + +def select_top_links(net, par): + net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index) + net = net_sorted.head(par['max_n_links']).reset_index(drop=True) + return net + def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"): - grn = pd.DataFrame() + i = 0 for group in tqdm(np.unique(groups), desc="Processing groups"): X_sub = X[groups == group, :] if method == "dotproduct": - print('dotproduct') net = X_sub.T.dot(X_sub) elif method == "pearson": - print('pearson') net = np.corrcoef(X_sub.T) + net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0) elif method == "spearman": net = spearmanr(X_sub).statistic - net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0) - + + net = pd.DataFrame(net, index=gene_names, columns=gene_names) if par['causal']: - print('causal') net = net[tf_all] else: - print('non causal') net = net.sample(len(tf_all), axis=1, random_state=par['seed']) - + # flatten to source-target-weight net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight') net.rename(columns={'index': 'target'}, inplace=True) + net = select_top_links(net, par) net['cell_type'] = group - - grn = pd.concat([grn, net], axis=0).reset_index(drop=True) + if i==0: + grn = net + else: + grn = pd.concat([grn, net], axis=0).reset_index(drop=True) + + i += 1 if par['cell_type_specific']==False: - print('Non specific') grn.drop(columns=['cell_type'], inplace=True) grn = grn.groupby(['source', 'target']).mean().reset_index() - print(grn) + net = select_top_links(net, par) + return grn print('Read data') multiomics_rna = ad.read_h5ad(par["multiomics_rna"]) -multiomics_rna = multiomics_rna[:,:2000] #TODO: togo +# multiomics_rna = multiomics_rna[:,:5000] #TODO: togo if par['metacell']: print('metacell') @@ -108,6 +119,5 @@ def create_meta_cells(df, n_cells=15): - print('Output GRN') net.to_csv(par['prediction']) diff --git a/src/control_methods/baseline_corr/test.sh b/src/control_methods/baseline_corr/test.sh new file mode 100644 index 000000000..884e70266 --- /dev/null +++ b/src/control_methods/baseline_corr/test.sh @@ -0,0 +1,5 @@ +viash run src/control_methods/baseline_corr/config.vsh.yaml -- \ + --prediction output/baseline_corr.csv \ + --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --causal true diff --git a/src/exp_analysis/config.vsh.yaml b/src/exp_analysis/config.vsh.yaml index f6969e898..5b3336052 100644 --- a/src/exp_analysis/config.vsh.yaml +++ b/src/exp_analysis/config.vsh.yaml @@ -10,22 +10,23 @@ functionality: type: file required: false direction: input - default: resources/grn-benchmark/perturbation_data.h5ad example: resources_test/grn-benchmark/perturbation_data.h5ad - - name: --tf_gene_net + - name: --prediction type: file required: true direction: input + example: resources_test/grn_models/collectri.csv - name: --peak_gene_net type: file required: false direction: input + example: resources_test/peak_gene_models/collectri.csv - name: --annot_peak_database type: file required: false direction: input must_exist: false - example: resources/supplements/annot_peak_database.csv + example: resources_test/supplementary/annot_peak_database.csv # - name: --annot_gene_database # type: file # required: false diff --git a/src/exp_analysis/run.sh b/src/exp_analysis/run.sh deleted file mode 100644 index 1fd44cce0..000000000 --- a/src/exp_analysis/run.sh +++ /dev/null @@ -1 +0,0 @@ -viash run src/exp_analysis/config.vsh.yaml -- --tf_gene_net resources/grn-benchmark/grn_models/figr.csv --peak_gene_net resources/grn-benchmark/peak_gene_models/figr.csv --annot_peak_database resources/grn-benchmark/supp/annot_peak_database.csv diff --git a/src/exp_analysis/script.py b/src/exp_analysis/script.py index 66e2eedc6..a9b810c4f 100644 --- a/src/exp_analysis/script.py +++ b/src/exp_analysis/script.py @@ -6,7 +6,7 @@ ## VIASH START par = { "perturbation_data": "resources/grn-benchmark/pertubation_data.h5ad", - "tf_gene_net": "resources/grn-benchmark/grn_models/figr.csv", + "prediction": "resources/grn-benchmark/grn_models/figr.csv", # "peak_gene_net": "resources/grn-benchmark/peak_gene_models/figr.csv", "annot_peak_database": "resources/grn-benchmark/supp/annot_peak_database.csv", "annot_gene_database": "resources/grn-benchmark/supp/annot_gene_database.csv", @@ -24,13 +24,13 @@ print('Reading input files', flush=True) perturbation_data = ad.read_h5ad(par["perturbation_data"]) -tf_gene_net = pd.read_csv(par["tf_gene_net"]) +prediction = pd.read_csv(par["prediction"]) # peak_gene_net = pd.read_csv(par["peak_gene_net"]) # annot_peak_database = pd.read_csv(par["annot_peak_database"]) # hvgs = pd.read_csv(par["hvgs"]) # peak_gene_net['source'] = peak_gene_net['peak'] -info_obj = Explanatory_analysis(net=tf_gene_net) +info_obj = Explanatory_analysis(net=prediction) print("Calculate basic stats") stats = info_obj.calculate_basic_stats() print(stats) diff --git a/src/exp_analysis/test.sh b/src/exp_analysis/test.sh new file mode 100644 index 000000000..67cba6c60 --- /dev/null +++ b/src/exp_analysis/test.sh @@ -0,0 +1,4 @@ +viash run src/exp_analysis/config.vsh.yaml -- \ + --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ + --prediction output/baseline_corr.csv \ + diff --git a/src/metrics/regression_1/test.sh b/src/metrics/regression_1/test.sh new file mode 100644 index 000000000..80550ae03 --- /dev/null +++ b/src/metrics/regression_1/test.sh @@ -0,0 +1,5 @@ +viash run src/metrics/regression_1/config.vsh.yaml -- \ + --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --prediction output/baseline_corr.csv \ + --score output/score.h5ad \ No newline at end of file diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index f82b13994..359b973a6 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -43,14 +43,18 @@ workflow run_wf { ) | baseline_corr.run( runIf: { id, state -> - ['baseline_pearson', 'baseline_dotproduct_causal'].contains(state.method_id) + state.method_id == 'baseline_pearson' }, fromState: [ multiomics_rna: "multiomics_rna", layer: "layer", tf_all: "tf_all", causal: "causal", - corr_method: "corr_method" + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + ], toState: {id, output, state -> state + [ @@ -58,113 +62,112 @@ workflow run_wf { ] } ) - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_dotproduct' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) - - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_pearson_causal' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_dotproduct_causal' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_dotproduct_causal_cell_type' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method", - // cell_type_specific: "cell_type_specific" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_dotproduct_causal_metacell' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method", - // metacell: "metacell" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) - // | baseline_corr.run( - // runIf: { id, state -> - // state.method_id == 'baseline_dotproduct_causal_impute' - // }, - // fromState: [ - // multiomics_rna: "multiomics_rna", - // layer: "layer", - // tf_all: "tf_all", - // causal: "causal", - // corr_method: "corr_method", - // metacell: "metacell", - // impute: "impute" - // ], - // toState: {id, output, state -> - // state + [ - // prediction: output.prediction - // ] - // } - // ) + | baseline_corr.run( + runIf: { id, state -> + state.method_id == 'baseline_dotproduct' + }, + fromState: [ + multiomics_rna: "multiomics_rna", + layer: "layer", + tf_all: "tf_all", + causal: "causal", + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + + ], + toState: {id, output, state -> + state + [ + prediction: output.prediction + ] + } + ) + | baseline_corr.run( + runIf: { id, state -> + state.method_id == 'baseline_dotproduct_causal' + }, + fromState: [ + multiomics_rna: "multiomics_rna", + layer: "layer", + tf_all: "tf_all", + causal: "causal", + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + + ], + toState: {id, output, state -> + state + [ + prediction: output.prediction + ] + } + ) + | baseline_corr.run( + runIf: { id, state -> + state.method_id == 'baseline_dotproduct_causal_cell_type' + }, + fromState: [ + multiomics_rna: "multiomics_rna", + layer: "layer", + tf_all: "tf_all", + causal: "causal", + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + + ], + toState: {id, output, state -> + state + [ + prediction: output.prediction + ] + } + ) + | baseline_corr.run( + runIf: { id, state -> + state.method_id == 'baseline_dotproduct_causal_metacell' + }, + fromState: [ + multiomics_rna: "multiomics_rna", + layer: "layer", + tf_all: "tf_all", + causal: "causal", + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + + ], + toState: {id, output, state -> + state + [ + prediction: output.prediction + ] + } + ) + | baseline_corr.run( + runIf: { id, state -> + state.method_id == 'baseline_dotproduct_causal_impute' + }, + fromState: [ + multiomics_rna: "multiomics_rna", + layer: "layer", + tf_all: "tf_all", + causal: "causal", + corr_method: "corr_method", + cell_type_specific: "cell_type_specific", + metacell: "metacell", + impute: "impute" + + ], + toState: {id, output, state -> + state + [ + prediction: output.prediction + ] + } + ) + | negative_control.run( runIf: { id, state -> diff --git a/test.sh b/test.sh new file mode 100644 index 000000000..b4665d8d6 --- /dev/null +++ b/test.sh @@ -0,0 +1,19 @@ +viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --causal true \ + --prediction output/baseline_causal.csv + +viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --prediction output/baseline_causal.csv \ + --score output/score_causal.h5ad + +viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --causal false \ + --prediction output/baseline_noncausal.csv + +viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ + --tf_all resources/prior/tf_all.csv \ + --prediction output/baseline_noncausal.csv \ + --score output/score_noncausal.h5ad \ No newline at end of file