diff --git a/webgestalt_lib/src/methods/gsea.rs b/webgestalt_lib/src/methods/gsea.rs index c7965f3..ff36f92 100644 --- a/webgestalt_lib/src/methods/gsea.rs +++ b/webgestalt_lib/src/methods/gsea.rs @@ -36,20 +36,20 @@ pub struct RankListItem { pub rank: f64, } -struct GSEAResult { +struct PartialGSEAResult { // TODO: Look at adding enrichment and normalized enrichment score set: String, p: f64, es: f64, nes: f64, - overlap: i32, leading_edge: i32, running_sum: Vec, + nes_iter: Vec, } -impl GSEAResult { - pub fn add_fdr(&self, fdr: f64) -> FullGSEAResult { - FullGSEAResult { +impl PartialGSEAResult { + pub fn add_fdr(&self, fdr: f64) -> GSEAResult { + GSEAResult { set: self.set.clone(), p: self.p, fdr, @@ -62,7 +62,7 @@ impl GSEAResult { } #[derive(Clone)] -pub struct FullGSEAResult { +pub struct GSEAResult { /// The set name pub set: String, /// The statistical p-value @@ -113,7 +113,7 @@ fn analyte_set_p( p: f64, permutations_vec: &Vec>, config: &GSEAConfig, -) -> (GSEAResult, Vec) { +) -> PartialGSEAResult { let permutations = permutations_vec.len(); let analyte_set = AHashSet::from_iter(item.parts.iter()); let mut n_r: f64 = 0.0; @@ -128,19 +128,16 @@ fn analyte_set_p( } } if overlap < config.min_overlap || overlap > config.max_overlap { - ( - GSEAResult { - // No GSEA needed - set: item.id.clone(), - p: 1.0, - nes: 0.0, - es: 0.0, - overlap, - leading_edge: 0, - running_sum: Vec::new(), - }, - Vec::new(), - ) + PartialGSEAResult { + // No GSEA needed + set: item.id.clone(), + p: 1.0, + nes: 0.0, + es: 0.0, + leading_edge: 0, + running_sum: Vec::new(), + nes_iter: Vec::new(), + } } else { let inverse_nr = 1.0 / n_r; // Invert n_r for the enrichment score let original_order = (0..analyte_count).collect::>(); // get regular order @@ -215,18 +212,15 @@ fn analyte_set_p( } else { -real_es / down_avg }; - ( - GSEAResult { - set: item.id.clone(), - p, - nes: norm_es, - es: real_es, - overlap, - leading_edge: max_hits, - running_sum, - }, - nes_es, - ) + PartialGSEAResult { + set: item.id.clone(), + p, + nes: norm_es, + es: real_es, + leading_edge: max_hits, + running_sum, + nes_iter: nes_es, + } } } @@ -295,29 +289,25 @@ pub fn gsea( gmt: Vec, config: GSEAConfig, provided_permutations: Option>>, -) -> Vec { +) -> Vec { println!("Starting GSEA Calculation."); analyte_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap()); // sort list let (analytes, ranks) = RankListItem::to_vecs(analyte_list.clone()); // seperate into vectors let permutations: Vec> = provided_permutations.unwrap_or(make_permutations(config.permutations, analytes.len())); - let all_nes = Arc::new(Mutex::new(Vec::new())); - let set_nes = Arc::new(Mutex::new(Vec::new())); - let all_res = Arc::new(Mutex::new(Vec::new())); - gmt.par_iter().for_each(|analyte_set| { - // parallelized scoring of all sets - let (y, nes_iter) = - analyte_set_p(&analytes, &ranks, analyte_set, 1.0, &permutations, &config); - if y.overlap >= config.min_overlap && y.overlap <= config.max_overlap { - all_nes.lock().unwrap().extend(nes_iter); - set_nes.lock().unwrap().push(y.nes); - all_res.lock().unwrap().push(y); - } - }); - let null_distribution = all_nes.lock().unwrap(); - let observed_distribution = set_nes.lock().unwrap(); - let partial_results: std::sync::MutexGuard<'_, Vec> = all_res.lock().unwrap(); - let mut final_gsea: Vec = Vec::new(); + let partial_results: Vec = gmt + .par_iter() + .map(|analyte_set| { + // parallelized scoring of all sets + analyte_set_p(&analytes, &ranks, analyte_set, 1.0, &permutations, &config) + }) + .collect(); + let null_distribution: Vec = partial_results + .iter() + .flat_map(|x| x.nes_iter.clone()) + .collect(); + let observed_distribution: Vec = partial_results.iter().map(|x| x.nes).collect(); + let mut final_gsea: Vec = Vec::new(); let postive_top_side = null_distribution .par_iter() .filter(|&x| x >= &0_f64) @@ -334,9 +324,9 @@ pub fn gsea( .par_iter() .filter(|&x| x < &0_f64) .collect(); - for i in 0..partial_results.len() { + for item in &partial_results { // get all FDR values - let nes = partial_results[i].nes; + let nes = item.nes; let top_side: &Vec<&f64> = if nes > 0_f64 { // positive null distribution &postive_top_side @@ -368,7 +358,7 @@ pub fn gsea( .filter(|&x| x.abs() >= nes_abs) .count() as f64; let fdr: f64 = (top_val * bottom_len) / (bottom_val * top_len); // get FDR value - final_gsea.push(partial_results[i].add_fdr(fdr)); + final_gsea.push(item.add_fdr(fdr)); } final_gsea } diff --git a/webgestalt_lib/src/methods/multiomics.rs b/webgestalt_lib/src/methods/multiomics.rs index 2fb40c4..bd700e2 100644 --- a/webgestalt_lib/src/methods/multiomics.rs +++ b/webgestalt_lib/src/methods/multiomics.rs @@ -2,7 +2,7 @@ use ahash::{AHashMap, AHashSet}; use statrs::distribution::{Continuous, ContinuousCDF, Normal}; use super::{ - gsea::{FullGSEAResult, GSEAConfig, RankListItem}, + gsea::{GSEAConfig, GSEAResult, RankListItem}, ora::{get_ora, ORAConfig, ORAResult}, }; use crate::{methods::gsea::gsea, readers::utils::Item}; @@ -61,10 +61,10 @@ pub enum NormalizationMethod { /// /// Returns a [`Vec>`] containing the results of each analysis. If the method was not meta-analysis, then the outer vector will only have one element. /// If the method was meta-analysis, then the first element will be the results of the meta-analysis, and the rest of the elements will be the results of each analysis run individually. -pub fn multiomic_gsea(jobs: Vec, method: MultiOmicsMethod) -> Vec> { +pub fn multiomic_gsea(jobs: Vec, method: MultiOmicsMethod) -> Vec> { if let MultiOmicsMethod::Meta(meta_method) = method { let mut phash: AHashMap> = AHashMap::default(); - let mut results: Vec> = Vec::new(); + let mut results: Vec> = Vec::new(); for job in jobs { let res = gsea(job.rank_list, job.gmt, job.config, None); for row in res.iter() { @@ -73,12 +73,12 @@ pub fn multiomic_gsea(jobs: Vec, method: MultiOmicsMethod) -> Vec = Vec::new(); + let mut final_result: Vec = Vec::new(); match meta_method { MetaAnalysisMethod::Stouffer => { let normal = Normal::new(0.0, 1.0).unwrap(); for set in phash.keys() { - final_result.push(FullGSEAResult { + final_result.push(GSEAResult { set: set.clone(), p: stouffer_with_normal(&phash[set], &normal), fdr: 0.0, @@ -91,7 +91,7 @@ pub fn multiomic_gsea(jobs: Vec, method: MultiOmicsMethod) -> Vec { for set in phash.keys() { - final_result.push(FullGSEAResult { + final_result.push(GSEAResult { set: set.clone(), p: fisher(&phash[set]), fdr: 0.0,