Skip to content

Commit

Permalink
Merge branch 'main' into cansavvy/gi_calc
Browse files Browse the repository at this point in the history
  • Loading branch information
cansavvy authored Jul 16, 2024
2 parents 8672035 + 73c9acd commit e9a0a20
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 37 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(gimap_filter)
export(gimap_normalize)
export(run_qc)
export(setup_data)
import(dplyr)
import(ggplot2)
import(kableExtra)
importFrom(dplyr,across)
Expand Down
4 changes: 2 additions & 2 deletions R/01-qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ run_qc <- function(gimap_dataset,
results_file <- gsub("\\.Rmd$", "\\.html", output_file)
message("Results in: ", results_file)

results_file <- normalizePath(list.files(pattern = results_file, full.names = TRUE))

if (results_file != "") browseURL(results_file)

results_file
}
57 changes: 29 additions & 28 deletions R/plots-qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom ggplot2 ggplot labs
#' @return counts_cdf a ggplot
#' @examples \dontrun{
#'
#'
#' gimap_dataset <- get_example_data("gimap")
#' qc_cdf(gimap_dataset)
#'
Expand Down Expand Up @@ -76,6 +76,7 @@ qc_sample_hist <- function(gimap_dataset, wide_ar = 0.75) {
#' @importFrom tidyr pivot_longer
#' @importFrom magrittr %>%
#' @import ggplot2
#' @import dplyr
#' @return a ggplot histogram
#' @examples \dontrun{
#' gimap_dataset <- get_example_data("gimap")
Expand All @@ -86,14 +87,14 @@ qc_sample_hist <- function(gimap_dataset, wide_ar = 0.75) {
qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL, wide_ar = 0.75){

if(is.null(filter_replicates_target_col)){ filter_replicates_target_col <- c((ncol(gimap_dataset$transformed_data$log2_cpm)-2) : ncol(gimap_dataset$transformed_data$log2_cpm))} #last 3 columns of the data

return(
gimap_dataset$transformed_data$log2_cpm[,filter_replicates_target_col] %>%
as.data.frame() %>%
mutate(row = row_number()) %>%
tidyr::pivot_longer(-row) %>%
group_by(row) %>%
summarize(var = var(value)) %>%
dplyr::summarize(var = var(value)) %>%
ggplot(aes(x=var)) +
geom_histogram(binwidth = 0.1) +
theme(panel.background = element_blank(),
Expand All @@ -116,17 +117,18 @@ qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL,
#' @importFrom tidyr pivot_longer
#' @importFrom magrittr %>%
#' @import ggplot2
#' @import dplyr
#' @return a ggplot barplot
#' @examples \dontrun{
#' gimap_dataset <- get_example_data("gimap")
#' qc_constructs_countzero_bar(gimap_dataset)
#'
#'
#' #or if you want to select a specific column(s) for looking at where/which samples zero counts are present for
#' qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5)
#'
#'
#' #or if you want to select a specific column(s) for the final day/sample replicates
#' qc_constructs_countzero_bar(gimap_dataset, filter_replicates_target_col = 3:5)
#'
#'
#' #or some combination of those
#' qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5, filter_replicates_target_col = 3:5)
#' }
Expand All @@ -135,19 +137,19 @@ qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL,
qc_constructs_countzero_bar <- function(gimap_dataset, filter_zerocount_target_col = NULL, filter_replicates_target_col = NULL, wide_ar = 0.75){

if(is.null(filter_zerocount_target_col)){filter_zerocount_target_col <- c(1:ncol(gimap_dataset$raw_counts))}

if (!all(filter_zerocount_target_col %in% 1:ncol(gimap_dataset$raw_counts))) {
stop("The columns selected do not exist. `filter_zerocount_target_col` needs to correspond to the index of the columns in `gimap_dataset$raw_counts` that you need to filter by")
stop("The columns selected do not exist. `filter_zerocount_target_col` needs to correspond to the index of the columns in `gimap_dataset$raw_counts` that you need to filter by")
}

qc_filter_output <- qc_filter_zerocounts(gimap_dataset, filter_zerocount_target_col = filter_zerocount_target_col)

if(is.null(filter_replicates_target_col)){ filter_replicates_target_col <- c((ncol(gimap_dataset$transformed_data$log2_cpm)-2) : ncol(gimap_dataset$transformed_data$log2_cpm))} #last 3 columns of the data

if (!all(filter_replicates_target_col %in% 1:ncol(gimap_dataset$transformed_data$log2_cpm))) {
stop("The columns selected do not exist. `filter_replicates_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
stop("The columns selected do not exist. `filter_replicates_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
}


return(
gimap_dataset$raw_counts[qc_filter_output$filter, filter_replicates_target_col] %>%
Expand Down Expand Up @@ -211,42 +213,41 @@ qc_cor_heatmap <- function(gimap_dataset) {
#' @import ggplot2
#' @return a ggplot histogram
#' @examples \dontrun{
#'
#'
#' gimap_dataset <- get_example_data("gimap")
#'
#'
#' qc_plasmid_histogram(gimap_dataset)
#'
#'
#' # or to specify a "cutoff" value that will be displayed as a dashed vertical line
#' qc_plasmid_histogram(gimap_dataset, cutoff=1.75)
#'
#'
#' # or to specify a different column (or set of columns) to select
#' qc_plasmid_histogram(gimap_dataset, filter_plasmid_target_col=1:2)
#'
#' # or to specify a "cutoff" value that will be displayed as a dashed vertical line as well as to specify a different column (or set of columns) to select
#' qc_plasmid_histogram(gimap_dataset, cutoff=2, filter_plasmid_target_col=1:2)
#' }
#'

qc_plasmid_histogram <- function(gimap_dataset, cutoff = NULL, filter_plasmid_target_col = NULL, wide_ar = 0.75) {

if (is.null(filter_plasmid_target_col)) {filter_plasmid_target_col <- c(1)}

if (!all(filter_plasmid_target_col %in% 1:ncol(gimap_dataset$transformed_data$log2_cpm))) {
stop("The columns selected do not exist. `filter_plasmid_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
stop("The columns selected do not exist. `filter_plasmid_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
}

to_plot <- data.frame(gimap_dataset$transformed_data$log2_cpm[, filter_plasmid_target_col]) %>% `colnames<-`(rep(c("plasmid_log2_cpm"), length(filter_plasmid_target_col))) %>% clean_names()
if (length(filter_plasmid_target_col >1)){ #if more than one column was selected, collapse all of the columns into the same vector and store in a df to plot

if (length(filter_plasmid_target_col >1)){ #if more than one column was selected, collapse all of the columns into the same vector and store in a df to plot
to_plot <- data.frame(unlist(to_plot %>% select(starts_with("plasmid_log2_cpm")), use.names = FALSE)) %>% `colnames<-`(c("plasmid_log2_cpm"))
}

quantile_info <- quantile(to_plot$plasmid_log2_cpm)

if (is.null(cutoff)) { cutoff <- quantile_info["25%"] - (1.5 * (quantile_info["75%"] - quantile_info["25%"]))}
# if cutoff is null, suggest a cutoff and plot with suggested
return(

return(
ggplot(to_plot, aes(x = plasmid_log2_cpm)) +
geom_histogram(binwidth = 0.2, color = "black", fill = "gray60") +
plot_options() +
Expand Down
34 changes: 31 additions & 3 deletions man/gimap_filter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/qc_filter_plasmid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/qc_filter_zerocounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions tests/testthat/test-run_qc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
test_that("HTML file is created and content is correct", {
gimap_dataset <- get_example_data("gimap")
html_file <- run_qc(gimap_dataset)

expect_true(file.exists(html_file))
})
19 changes: 19 additions & 0 deletions tests/testthat/test-setup_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Example data for testing
example_counts <- matrix(1:20, nrow = 4, ncol = 5)
example_pg_ids <- data.frame(id = 1:4)
example_pg_metadata <- data.frame(info = c("A", "B", "C", "D"))
example_sample_metadata <- data.frame(id = 1:5, replicate = factor(c(1, 1, 2, 2, 3)), timepoint = factor(c("T0", "T0", "T1", "T1", "T2")))

# Test elements inside output list
test_that("setup_data() works correctly", {
result <- setup_data(counts = example_counts,
pg_ids = example_pg_ids,
pg_metadata = example_pg_metadata,
sample_metadata = example_sample_metadata)

expect_s3_class(result, "gimap_dataset")
expect_equal(result$raw_counts, example_counts)
expect_equal(result$metadata$pg_ids, example_pg_ids)
expect_equal(result$metadata$sample_metadata, example_sample_metadata)
expect_equal(result$counts_per_sample, apply(example_counts, 2, sum))
})

0 comments on commit e9a0a20

Please sign in to comment.