Skip to content

Commit

Permalink
Merge pull request #221 from KristinaGomoryova/alignment
Browse files Browse the repository at this point in the history
feature.alignment function refactored
  • Loading branch information
hechth authored Jul 29, 2024
2 parents 978102d + ef2f003 commit 8261d75
Show file tree
Hide file tree
Showing 25 changed files with 486 additions and 151 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: This is a customized fork of the original work from Tianwei Yu.
It takes the adaptive processing of LC/MS metabolomics data further
with focus on high resolution MS for both LC and GC applications.
Depends: R (>= 3.50), MASS, mzR, splines, doParallel, foreach,
snow, dplyr, tidyr, stringr, tibble, tools, arrow
snow, dplyr, tidyr, stringr, tibble, tools, arrow, plyr
biocViews: Technology, MassSpectrometry
License: GPL-2
LazyLoad: yes
Expand Down
14 changes: 11 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(adaptive.bin)
export(add_feature_ids)
export(adjust.time)
export(aggregate_by_rt)
export(as_feature_sample_table)
export(bigauss.esti)
export(bigauss.esti.EM)
export(bigauss.mix)
export(check_files)
export(clean_data_matrix)
export(comb)
export(compute_boundaries)
export(compute_bounds)
Expand All @@ -16,6 +20,7 @@ export(compute_clusters)
export(compute_clusters_simple)
export(compute_comb)
export(compute_corrected_features)
export(compute_corrected_features_v2)
export(compute_curr_rec_with_enough_peaks)
export(compute_delta_rt)
export(compute_densities)
Expand All @@ -40,10 +45,14 @@ export(compute_template)
export(compute_template_adjusted_rt)
export(compute_uniq_grp)
export(correct_time)
export(correct_time_v2)
export(count_peaks)
export(create_aligned_feature_table)
export(create_features_from_cluster)
export(create_intensity_row)
export(create_metadata)
export(create_output)
export(create_rows)
export(create_rt_row)
export(draw_rt_correction_plot)
export(draw_rt_normal_peaks)
export(duplicate.row.remove)
Expand All @@ -59,6 +68,7 @@ export(get_features_in_rt_range)
export(get_mzrange_bound_indices)
export(get_num_workers)
export(get_rt_region_indices)
export(get_sample_name)
export(get_single_occurrence_mask)
export(get_times_to_use)
export(hybrid)
Expand Down Expand Up @@ -90,8 +100,6 @@ export(remove_noise)
export(rev_cum_sum)
export(rm.ridge)
export(run_filter)
export(select_mz)
export(select_rt)
export(semi.sup)
export(solve_a)
export(solve_sigma)
Expand Down
59 changes: 53 additions & 6 deletions R/adjust.time.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
NULL
#> NULL

#' Combine template and sample features
#' @param template_features Tibble Template feature table (mz, rt, cluster, sample_id).
#' @param features Tibble Sample feature table (mz, rt, cluster, sample_id).
#' @return Tibble Combined feature table (rbind).
#' @export
compute_comb <- function(template_features, features) {
combined <- dplyr::bind_rows(
Expand All @@ -11,6 +15,12 @@ compute_comb <- function(template_features, features) {
return(combined)
}

#' Select features to use for retention time alignment
#' @description This function selects features present in both the sample
#' feature table and template feature table given they have the same cluster,
#' are adjacent in the combined table.
#' @param combined Tibble Table with (mz, rt, cluster, sample_id).
#' @return List of bool Returns list of bools with TRUE at each index where this condition is met.
#' @export
compute_sel <- function(combined) {
l <- nrow(combined)
Expand All @@ -19,6 +29,11 @@ compute_sel <- function(combined) {
return(sel)
}

#' Create two column table with paired sample and template retention times.
#' @param combined Tibble Table with features from sample and template.
#' @param sel list of bools List of bools indiciating which features to pair.
#' See 'compute_sel'.
#' @param j string Template sample_id.
#' @export
compute_template_adjusted_rt <- function(combined, sel, j) {
all_features <- cbind(combined$rt[sel], combined$rt[sel + 1])
Expand All @@ -34,6 +49,13 @@ compute_template_adjusted_rt <- function(combined, sel, j) {
return(all_features)
}

#' Correct the rt in feature table based on paired feature rts and differences.
#' @description This is a newer implementation based on dplyr which might be more efficient than the other function.
#' @param features Tibble The feature table for which to correct rts.
#' @param template_rt List of floats Template retention times for the paired features.
#' @param delta_rt List of floats Differences between the paired rts.
#' @return Tibble A table with corrected retention times.
#' @export
compute_corrected_features_v2 <- function(features, template_rt, delta_rt) {
features <- features |> dplyr::arrange_at(c("rt", "mz"))
idx <- dplyr::between(features$rt, min(template_rt), max(template_rt))
Expand All @@ -59,29 +81,34 @@ compute_corrected_features_v2 <- function(features, template_rt, delta_rt) {
return(features |> dplyr::arrange_at(c("mz", "rt")))
}

#' Correct the rt in feature table based on paired feature rts and differences.
#' @param features Tibble The feature table for which to correct rts.
#' @param template_rt List of floats Template retention times for the paired features.
#' @param delta_rt List of floats Differences between the paired rts.
#' @return Tibble A table with corrected retention times.
#' @export
compute_corrected_features <- function(features, delta_rt, avg_time) {
compute_corrected_features <- function(features, template_rt, delta_rt) {
features <- features |> dplyr::arrange_at(c("rt", "mz"))

corrected <- features$rt
original <- features$rt

idx <- dplyr::between(original, min(delta_rt), max(delta_rt))
idx <- dplyr::between(original, min(template_rt), max(template_rt))
to_correct <- original[idx]
this.smooth <- ksmooth(
template_rt,
delta_rt,
avg_time,
kernel = "normal",
bandwidth = (max(delta_rt) - min(delta_rt)) / 5,
bandwidth = (max(template_rt) - min(template_rt)) / 5,
x.points = to_correct
)

corrected[idx] <- this.smooth$y + to_correct
lower_bound_adjustment <- mean(this.smooth$y[this.smooth$x == min(this.smooth$x)])
upper_bound_adjustment <- mean(this.smooth$y[this.smooth$x == max(this.smooth$x)])

idx_lower <- original < min(delta_rt)
idx_upper <- original > max(delta_rt)
idx_lower <- original < min(template_rt)
idx_upper <- original > max(template_rt)

corrected[idx_lower] <- corrected[idx_lower] + lower_bound_adjustment
corrected[idx_upper] <- corrected[idx_upper] + upper_bound_adjustment
Expand All @@ -91,6 +118,10 @@ compute_corrected_features <- function(features, delta_rt, avg_time) {
return(features)
}

#' Fill missing values based on original retention times.
#' @param orig.features Non-corrected feature table.
#' @param this.features Feature table with eventual missing values.
#' @return Tibble Feature table with filles values.
#' @export
fill_missing_values <- function(orig.feature, this.feature) {
missing_values <- which(is.na(this.feature$rt))
Expand All @@ -104,6 +135,10 @@ fill_missing_values <- function(orig.feature, this.feature) {
return(this.feature)
}

#' Function to perform retention time correction
#' @param this.feature Tibble Feature table for which to correct rt.
#' @param template_features Tibble Template feature table to use for correction.
#' @return Tibble this.feature table with corrected rt values.
#' @export
correct_time <- function(this.feature, template_features) {
orig.features <- this.feature
Expand Down Expand Up @@ -137,6 +172,10 @@ correct_time <- function(this.feature, template_features) {
return(tibble::as_tibble(this.feature, column_name = c("mz", "rt", "sd1", "sd2", "area", "sample_id", "cluster")))
}

#' Select the template feature table.
#' @description The current implementation selects the table with the most features as the template.
#' @param extracted_features List of tables Tables from which to select the template.
#' @return Tibble Template feature table.
#' @export
compute_template <- function(extracted_features) {
num.ftrs <- sapply(extracted_features, nrow)
Expand All @@ -149,6 +188,14 @@ compute_template <- function(extracted_features) {
return(tibble::as_tibble(template_features))
}

#' Rewritten version of 'correct_time'
#' @description This function uses dplyr to do the same as
#' 'correct_time', just with less code. Most functions used in the original
#' function are replaced with simple data transformations.
#' @param features Tibble Table with features to correct.
#' @param template Tibble Template feature table to use for correction.
#' @return Tibble Corrected feature table.
#' @export
correct_time_v2 <- function(features, template) {
if (unique(features$sample_id) == unique(template$sample_id))
return(tibble::as_tibble(features))
Expand Down
Loading

0 comments on commit 8261d75

Please sign in to comment.