Skip to content

Commit

Permalink
Merge pull request #9 from bzhanglab/dev
Browse files Browse the repository at this point in the history
Add multi-omics and better CLI messages
  • Loading branch information
iblacksand authored Oct 31, 2023
2 parents ce38f27 + 3399de1 commit fff939d
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 65 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ interest.txt
latest.gmt
ref.txt
target_symbols.txt
/*.gmt
/*.rnk
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![Rust](https://github.com/bzhanglab/webgestalt_rust/actions/workflows/rust.yml/badge.svg?branch=master)](https://github.com/bzhanglab/webgestalt_rust/actions/workflows/rust.yml)

Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).
Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).

## Install

Expand Down
119 changes: 89 additions & 30 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ use owo_colors::{OwoColorize, Stream::Stdout, Style};
use std::io::{BufReader, Write};
use std::{fs::File, time::Instant};
use webgestalt_lib::methods::gsea::GSEAConfig;
use webgestalt_lib::methods::multiomics::{combine_gmts, MultiOmicsMethod, NormalizationMethod};
use webgestalt_lib::methods::ora::ORAConfig;
use webgestalt_lib::readers::read_rank_file;
use webgestalt_lib::readers::utils::Item;
use webgestalt_lib::readers::{read_gmt_file, read_rank_file};
use webgestalt_lib::{MalformedError, WebGestaltError};

/// WebGestalt CLI.
/// ORA and GSEA enrichment tool.
Expand Down Expand Up @@ -85,6 +88,7 @@ struct CombineGmtArgs {
/// Paths to the files to combine
files: Vec<String>,
}

#[derive(ValueEnum, Clone)]
enum NormMethods {
MedianRank,
Expand All @@ -93,8 +97,15 @@ enum NormMethods {
None,
}

#[derive(ValueEnum, Clone)]
enum CombinationMethods {
Max,
Mean,
}

#[derive(Args)]
struct CombineListArgs {
combination: Option<CombinationMethods>,
normalization: Option<NormMethods>,
out: Option<String>,
files: Vec<String>,
Expand Down Expand Up @@ -211,44 +222,78 @@ fn main() {
res.len()
);
}
Some(Commands::Test) => {
let list1 = read_rank_file("gene.rnk".to_string()).unwrap();
let list2 = read_rank_file("protein.rnk".to_string()).unwrap();
let list3 = read_rank_file("metabolite.rnk".to_string()).unwrap();
let lists = vec![list1, list2, list3];
// let gmt1 = webgestalt_lib::readers::read_gmt_file("gene.gmt".to_string()).unwrap();
// let gmt2 =
// webgestalt_lib::readers::read_gmt_file("metabolite.gmt".to_string()).unwrap();
// let combined_gmt = webgestalt_lib::methods::multiomics::combine_gmts(&vec![gmt1, gmt2]);
// let mut file = File::create("combined.gmt").unwrap();
// for row in combined_gmt {
// writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
// }
let mut combined_list = webgestalt_lib::methods::multiomics::combine_lists(
lists,
webgestalt_lib::methods::multiomics::MultiOmicsMethod::Mean,
webgestalt_lib::methods::multiomics::NormalizationMethod::MeanValue,
);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create("combined.rnk").unwrap();
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
Some(Commands::Test) => will_err(1).unwrap_or_else(|x| println!("{}", x)),
Some(Commands::Combine(args)) => match &args.combine_type {
Some(CombineType::Gmt(files)) => {}
Some(CombineType::List(files)) => {
Some(CombineType::Gmt(gmt_args)) => {
let style = Style::new().blue().bold();
println!(
"{}: READING GMTS",
"INFO".if_supports_color(Stdout, |text| text.style(style))
);
let mut gmts: Vec<Vec<Item>> = Vec::new();
let mut tot_length: usize = 0;
for path in gmt_args.files.clone() {
let gmt = read_gmt_file(path).unwrap();
tot_length += gmt.len();
gmts.push(gmt);
}
let combined_gmt = combine_gmts(&gmts);
println!(
"Found {} overlapping sets out of {}",
tot_length - combined_gmt.len(),
combined_gmt.len()
);
println!(
"{}: CREATING COMBINED GMT AT {}",
"INFO".if_supports_color(Stdout, |text| text.style(style)),
gmt_args.out.clone().unwrap()
);
let mut file = File::create(gmt_args.out.clone().unwrap()).unwrap();
for row in combined_gmt {
writeln!(file, "{}\t{}\t{}", row.id, row.url, row.parts.join("\t")).unwrap();
}
}
Some(CombineType::List(ora_args)) => {
let style = Style::new().blue().bold();
println!(
"{}: READING LISTS",
"INFO".if_supports_color(Stdout, |text| text.style(style))
);
let mut lists = Vec::new();
for file in files.files.iter() {
for file in ora_args.files.iter() {
lists.push(read_rank_file(file.clone()).unwrap());
}
let norm_method: NormalizationMethod = match ora_args.normalization {
Some(NormMethods::None) => NormalizationMethod::None,
Some(NormMethods::MeanValue) => NormalizationMethod::MeanValue,
Some(NormMethods::MedianRank) => NormalizationMethod::MedianRank,
Some(NormMethods::MedianValue) => NormalizationMethod::MedianValue,
None => panic!("No normalization method chosen."),
};
let method: MultiOmicsMethod = match ora_args.combination {
Some(CombinationMethods::Mean) => MultiOmicsMethod::Mean(norm_method),
Some(CombinationMethods::Max) => MultiOmicsMethod::Max(norm_method),
None => panic!("No combination method chosen."),
};
let mut combined_list =
webgestalt_lib::methods::multiomics::combine_lists(lists, method);
combined_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap());
let mut file = File::create(ora_args.out.clone().unwrap()).unwrap();
println!(
"{}: CREATING COMBINED LIST AT {}",
"INFO".if_supports_color(Stdout, |text| text.style(style)),
ora_args.out.clone().unwrap()
);
for row in combined_list {
writeln!(file, "{}\t{}", row.analyte, row.rank).unwrap();
}
}
_ => {
panic!("Please select a valid combine type");
println!("Please select a valid combine type");
}
},
_ => {
todo!("Please select a valid command. Run --help for options.")
println!("Please select a valid command. Run --help for options.")
}
}
}
Expand Down Expand Up @@ -288,3 +333,17 @@ fn benchmark() {
let mut ftsv = File::create("format_benchmarks.tsv").unwrap();
writeln!(ftsv, "{}", whole_file.join("\n")).unwrap();
}

fn will_err(x: i32) -> Result<(), WebGestaltError> {
if x == 0 {
Ok(())
} else {
Err(WebGestaltError::MalformedFile(MalformedError {
path: String::from("ExamplePath.txt"),
kind: webgestalt_lib::MalformedErrorType::WrongFormat {
found: String::from("GMT"),
expected: String::from("rank"),
},
}))
}
}
68 changes: 60 additions & 8 deletions webgestalt_lib/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,75 @@
use std::{error::Error, fmt};

pub mod methods;
pub mod readers;
pub mod stat;
pub enum Error {

trait CustomError {
fn msg(&self) -> String;
}

#[derive(Debug)]
pub enum WebGestaltError {
MalformedFile(MalformedError),
StatisticsError(StatisticsError),
IOError(std::io::Error),
}

pub enum MalformedError {
NoColumnsFound,
WrongFormat,
impl Error for WebGestaltError {}

impl fmt::Display for WebGestaltError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let msg: String = match &self {
WebGestaltError::MalformedFile(x) => x.msg(),
WebGestaltError::StatisticsError(x) => x.msg(),
WebGestaltError::IOError(x) => x.to_string(),
};
write!(f, "{}", msg)
}
}

#[derive(Debug)]
pub struct MalformedError {
pub path: String,
pub kind: MalformedErrorType,
}

#[derive(Debug)]
pub enum MalformedErrorType {
NoColumnsFound { delimeter: String },
WrongFormat { found: String, expected: String },
Unknown,
}

impl CustomError for MalformedError {
fn msg(&self) -> String {
let error_msg = match &self.kind {
MalformedErrorType::WrongFormat { found, expected } => format!(
"Wrong Format Found. Found: {}; Expected: {}",
found, expected
),
MalformedErrorType::Unknown => String::from("Unknown error type."),
MalformedErrorType::NoColumnsFound { delimeter } => format!(
"No column found with delimeter {}",
if delimeter == "\t" { "\\t" } else { delimeter }
),
};
format!("Error in {}: {}.", self.path, error_msg)
}
}

#[derive(Debug)]
pub enum StatisticsError {
FoundNANValue,
InvalidValue,
InvalidValue { value: f64 },
}

#[cfg(test)]
mod tests {
use super::*;
impl CustomError for StatisticsError {
fn msg(&self) -> String {
let error_msg = match &self {
StatisticsError::FoundNANValue => String::from("Found a NAN value"),
StatisticsError::InvalidValue { value } => format!("Found invalid value: {}", value),
};
format!("Statstical Error: {}.", error_msg)
}
}
35 changes: 33 additions & 2 deletions webgestalt_lib/src/methods/gsea.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use rayon::prelude::*;
use std::sync::{Arc, Mutex};

/// Parameters for GSEA
#[derive(Clone)]
pub struct GSEAConfig {
/// Power to raise each rank during the enrichment scoring
pub p: f64,
Expand Down Expand Up @@ -62,12 +63,19 @@ impl GSEAResult {

#[derive(Clone)]
pub struct FullGSEAResult {
/// The set name
pub set: String,
/// The statistical p-value
pub p: f64,
/// The FDR value
pub fdr: f64,
/// The enrichment score
pub es: f64,
/// The normalized enrichment score
pub nes: f64,
/// Leading edge count
pub leading_edge: i32,
/// Running sum vector
pub running_sum: Vec<f64>,
}

Expand Down Expand Up @@ -276,6 +284,10 @@ fn enrichment_score(
///
/// - `analyte_list` - [`Vec<RankListItem>`] of the rank list
/// - `gmt` - [`Vec<Item>`] of gmt file
///
/// # Returns
///
/// Returns a [`Vec<FullGSEAResult>`] of the GSEA results
pub fn gsea(
mut analyte_list: Vec<RankListItem>,
gmt: Vec<Item>,
Expand All @@ -286,7 +298,7 @@ pub fn gsea(
analyte_list.sort_by(|a, b| b.rank.partial_cmp(&a.rank).unwrap()); // sort list
let (analytes, ranks) = RankListItem::to_vecs(analyte_list.clone()); // seperate into vectors
let permutations: Vec<Vec<usize>> =
provided_permutations.unwrap_or(make_permuations(config.permutations, analytes.len()));
provided_permutations.unwrap_or(make_permutations(config.permutations, analytes.len()));
let all_nes = Arc::new(Mutex::new(Vec::new()));
let set_nes = Arc::new(Mutex::new(Vec::new()));
let all_res = Arc::new(Mutex::new(Vec::new()));
Expand Down Expand Up @@ -359,7 +371,26 @@ pub fn gsea(
final_gsea
}

pub fn make_permuations(permutations: i32, max: usize) -> Vec<Vec<usize>> {
/// Create index permutations for GSEA
///
/// # Parameters
///
/// - `permutations` - Number of permutations to create
/// - `max` - Maximum index to permute
///
/// # Returns
///
/// Returns a [`Vec<Vec<usize>>`] of the permutations
///
/// # Examples
///
/// ```
/// use webgestalt_lib::methods::gsea::make_permutations;
/// let permutations = make_permutations(10, 100);
/// assert_eq!(permutations.len(), 10);
/// assert_eq!(permutations[0].len(), 100);
/// ```
pub fn make_permutations(permutations: i32, max: usize) -> Vec<Vec<usize>> {
let mut temp_permutations: Vec<Vec<usize>> = Vec::new();
let mut smallrng = rand::rngs::SmallRng::from_entropy();
(0..permutations).for_each(|_i| {
Expand Down
Loading

0 comments on commit fff939d

Please sign in to comment.