From a9e019eba7348ec66f22d40feac5617149914d3c Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 3 Jul 2024 12:00:48 -0700
Subject: [PATCH 1/2] Fix bugs

---
 NAMESPACE                          |  1 +
 R/01-qc.R                          |  4 +--
 R/plots-qc.R                       | 57 +++++++++++++++---------------
 man/gimap_filter.Rd                | 33 +++++++++++++++--
 man/qc_cdf.Rd                      |  3 ++
 man/qc_constructs_countzero_bar.Rd | 24 +++++++++++--
 man/qc_cor_heatmap.Rd              |  3 +-
 man/qc_filter_plasmid.Rd           | 44 +++++++++++++++++++++++
 man/qc_filter_zerocounts.Rd        | 12 +++++--
 man/qc_plasmid_histogram.Rd        | 31 ++++++++++++++--
 man/qc_sample_hist.Rd              |  3 +-
 man/qc_variance_hist.Rd            |  8 ++++-
 man/run_qc.Rd                      |  9 +++++
 man/setup_data.Rd                  |  2 +-
 14 files changed, 189 insertions(+), 45 deletions(-)
 create mode 100644 man/qc_filter_plasmid.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 1880789..2fdf9dc 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ export(gimap_annotate)
 export(gimap_filter)
 export(run_qc)
 export(setup_data)
+import(dplyr)
 import(ggplot2)
 import(kableExtra)
 importFrom(dplyr,across)
diff --git a/R/01-qc.R b/R/01-qc.R
index 240968c..b721888 100644
--- a/R/01-qc.R
+++ b/R/01-qc.R
@@ -74,7 +74,7 @@ run_qc <- function(gimap_dataset,
   results_file <- gsub("\\.Rmd$", "\\.html", output_file)
   message("Results in: ", results_file)
 
-  results_file <- normalizePath(list.files(pattern = results_file, full.names = TRUE))
-
   if (results_file != "") browseURL(results_file)
+
+  results_file
 }
diff --git a/R/plots-qc.R b/R/plots-qc.R
index ec056bb..560da09 100644
--- a/R/plots-qc.R
+++ b/R/plots-qc.R
@@ -7,7 +7,7 @@
 #' @importFrom ggplot2 ggplot labs
 #' @return counts_cdf a ggplot
 #' @examples \dontrun{
-#' 
+#'
 #' gimap_dataset <- get_example_data("gimap")
 #' qc_cdf(gimap_dataset)
 #'
@@ -76,6 +76,7 @@ qc_sample_hist <- function(gimap_dataset, wide_ar = 0.75) {
 #' @importFrom tidyr pivot_longer
 #' @importFrom magrittr %>%
 #' @import ggplot2
+#' @import dplyr
 #' @return a ggplot histogram
 #' @examples \dontrun{
 #' gimap_dataset <- get_example_data("gimap")
@@ -86,14 +87,14 @@ qc_sample_hist <- function(gimap_dataset, wide_ar = 0.75) {
 qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL, wide_ar = 0.75){
 
   if(is.null(filter_replicates_target_col)){ filter_replicates_target_col <- c((ncol(gimap_dataset$transformed_data$log2_cpm)-2) : ncol(gimap_dataset$transformed_data$log2_cpm))} #last 3 columns of the data
-  
+
   return(
     gimap_dataset$transformed_data$log2_cpm[,filter_replicates_target_col] %>%
       as.data.frame() %>%
       mutate(row = row_number()) %>%
       tidyr::pivot_longer(-row) %>%
       group_by(row) %>%
-      summarize(var = var(value)) %>%
+      dplyr::summarize(var = var(value)) %>%
       ggplot(aes(x=var)) +
       geom_histogram(binwidth = 0.1) +
       theme(panel.background = element_blank(),
@@ -116,17 +117,18 @@ qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL,
 #' @importFrom tidyr pivot_longer
 #' @importFrom magrittr %>%
 #' @import ggplot2
+#' @import dplyr
 #' @return a ggplot barplot
 #' @examples \dontrun{
 #' gimap_dataset <- get_example_data("gimap")
 #' qc_constructs_countzero_bar(gimap_dataset)
-#' 
+#'
 #' #or if you want to select a specific column(s) for looking at where/which samples zero counts are present for
 #' qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5)
-#' 
+#'
 #' #or if you want to select a specific column(s) for the final day/sample replicates
 #' qc_constructs_countzero_bar(gimap_dataset, filter_replicates_target_col = 3:5)
-#' 
+#'
 #' #or some combination of those
 #' qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5, filter_replicates_target_col = 3:5)
 #' }
@@ -135,19 +137,19 @@ qc_variance_hist <- function(gimap_dataset, filter_replicates_target_col = NULL,
 qc_constructs_countzero_bar <- function(gimap_dataset, filter_zerocount_target_col = NULL, filter_replicates_target_col = NULL, wide_ar = 0.75){
 
   if(is.null(filter_zerocount_target_col)){filter_zerocount_target_col <- c(1:ncol(gimap_dataset$raw_counts))}
-  
+
   if (!all(filter_zerocount_target_col %in% 1:ncol(gimap_dataset$raw_counts))) {
-    stop("The columns selected do not exist. `filter_zerocount_target_col` needs to correspond to the index of the columns in `gimap_dataset$raw_counts` that you need to filter by") 
+    stop("The columns selected do not exist. `filter_zerocount_target_col` needs to correspond to the index of the columns in `gimap_dataset$raw_counts` that you need to filter by")
   }
-  
+
   qc_filter_output <- qc_filter_zerocounts(gimap_dataset, filter_zerocount_target_col = filter_zerocount_target_col)
-  
+
   if(is.null(filter_replicates_target_col)){ filter_replicates_target_col <- c((ncol(gimap_dataset$transformed_data$log2_cpm)-2) : ncol(gimap_dataset$transformed_data$log2_cpm))} #last 3 columns of the data
-  
+
   if (!all(filter_replicates_target_col %in% 1:ncol(gimap_dataset$transformed_data$log2_cpm))) {
-    stop("The columns selected do not exist. `filter_replicates_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by") 
+    stop("The columns selected do not exist. `filter_replicates_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
   }
-  
+
 
   return(
     gimap_dataset$raw_counts[qc_filter_output$filter, filter_replicates_target_col] %>%
@@ -214,14 +216,14 @@ qc_cor_heatmap <- function(gimap_dataset) {
 #' @import ggplot2
 #' @return a ggplot histogram
 #' @examples \dontrun{
-#' 
+#'
 #' gimap_dataset <- get_example_data("gimap")
-#' 
+#'
 #' qc_plasmid_histogram(gimap_dataset)
-#' 
+#'
 #' # or to specify a "cutoff" value that will be displayed as a dashed vertical line
 #' qc_plasmid_histogram(gimap_dataset, cutoff=1.75)
-#' 
+#'
 #' # or to specify a different column (or set of columns) to select
 #' qc_plasmid_histogram(gimap_dataset, filter_plasmid_target_col=1:2)
 #'
@@ -229,27 +231,26 @@ qc_cor_heatmap <- function(gimap_dataset) {
 #' qc_plasmid_histogram(gimap_dataset, cutoff=2, filter_plasmid_target_col=1:2)
 #' }
 #'
-
 qc_plasmid_histogram <- function(gimap_dataset, cutoff = NULL, filter_plasmid_target_col = NULL, wide_ar = 0.75) {
-  
+
   if (is.null(filter_plasmid_target_col)) {filter_plasmid_target_col <- c(1)}
-  
+
   if (!all(filter_plasmid_target_col %in% 1:ncol(gimap_dataset$transformed_data$log2_cpm))) {
-    stop("The columns selected do not exist. `filter_plasmid_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by") 
+    stop("The columns selected do not exist. `filter_plasmid_target_col` needs to correspond to the index of the columns in `gimap_dataset$transformed_data$log2_cpm` that you need to filter by")
   }
-  
+
   to_plot <- data.frame(gimap_dataset$transformed_data$log2_cpm[, filter_plasmid_target_col]) %>% `colnames<-`(rep(c("plasmid_log2_cpm"), length(filter_plasmid_target_col))) %>% clean_names()
-  
-  if (length(filter_plasmid_target_col >1)){ #if more than one column was selected, collapse all of the columns into the same vector and store in a df to plot 
+
+  if (length(filter_plasmid_target_col >1)){ #if more than one column was selected, collapse all of the columns into the same vector and store in a df to plot
     to_plot <- data.frame(unlist(to_plot %>% select(starts_with("plasmid_log2_cpm")), use.names = FALSE)) %>% `colnames<-`(c("plasmid_log2_cpm"))
   }
-  
+
     quantile_info <- quantile(to_plot$plasmid_log2_cpm)
-    
+
     if (is.null(cutoff)) { cutoff <- quantile_info["25%"] - (1.5 * (quantile_info["75%"] - quantile_info["25%"]))}
       # if cutoff is null, suggest a cutoff and plot with suggested
-  
-  return(   
+
+  return(
     ggplot(to_plot, aes(x = plasmid_log2_cpm)) +
       geom_histogram(binwidth = 0.2, color = "black", fill = "gray60") +
       plot_options() +
diff --git a/man/gimap_filter.Rd b/man/gimap_filter.Rd
index 8ffea49..7b09cb5 100644
--- a/man/gimap_filter.Rd
+++ b/man/gimap_filter.Rd
@@ -4,21 +4,37 @@
 \alias{gimap_filter}
 \title{A function to run filtering}
 \usage{
-gimap_filter(.data = NULL, gimap_dataset, filter_type = "both")
+gimap_filter(
+  .data = NULL,
+  gimap_dataset,
+  filter_type = "both",
+  cutoff = NULL,
+  filter_zerocount_target_col = NULL,
+  filter_plasmid_target_col = NULL,
+  min_n_filters = 1
+)
 }
 \arguments{
 \item{.data}{Data can be piped in with %>% or |> from function to function. But the data must still be a gimap_dataset}
 
 \item{gimap_dataset}{A special dataset structure that is setup using the `setup_data()` function.}
 
-\item{filter_type}{Can be one of the following: `zero_count_only`, `low_plasmid_cpm_only` or `rep_variation`, `zero_in_last_time_point` or a vector that includes multiple of these filters.
+\item{filter_type}{Can be one of the following: `zero_count_only`, `low_plasmid_cpm_only` or `both`. Potentially in the future also `rep_variation`, `zero_in_last_time_point` or a vector that includes multiple of these filters.}
+
+\item{cutoff}{default is NULL, relates to the low_plasmid_cpm filter; the cutoff for low log2 CPM values for the plasmid time period; if not specified, The lower outlier (defined by taking the difference of the lower quartile and 1.5 * interquartile range) is used}
+
+\item{filter_zerocount_target_col}{default is NULL; Which sample column(s) should be used to check for counts of 0? If NULL and not specified, downstream analysis will select all sample columns}
+
+\item{filter_plasmid_target_col}{default is NULL, and if NULL, will select the first column only; this parameter specifically should be used to specify the plasmid column(s) that will be selected}
+
+\item{min_n_filters}{default is 1; this parameter defines at least how many/the minimum number of independent filters have to flag a pgRNA construct before the construct is filtered when using a combination of filters
 You should decide on the appropriate filter based on the results of your QC report.}
 }
 \value{
 a filtered version of the gimap_dataset returned in the $filtered_data section
 }
 \description{
-This is a function here's where we describe what it does
+This function applies filters to the gimap data. By default it runs both the zero count (across all samples) and the low plasmid cpm filters, but users can select a subset of these filters or even adjust the behavior of each filter
 }
 \examples{
 \dontrun{
@@ -34,6 +50,17 @@ gimap_dataset <- gimap_filter(gimap_dataset)
 # To see filtered data
 gimap_dataset$filtered_data
 
+# If you want to only use a single filter or some subset, specify which using the filter_type parameter
+gimap_dataset <- gimap_filter(gimap_dataset, filter_type = "zero_count_only") 
+#or 
+gimap_dataset <- gimap_filter(gimap_dataset, filter_type = "low_plasmid_cpm_only")
+
+# If you want to use multiple filters and more than one to flag a pgRNA construct before it's filtered out, use the `min_n_filters` argument
+gimap_dataset <- gimap_filter(gimap_ddataset, filter_type = "both", min_n_filters = 2)
+
+# You can also specify which columns the filters will be applied to
+gimap_dataset <- gimap_filter(gimap_dataset, filter_type = "zero_count_only", filter_zerocount_target_col = c(1,2))
+
 }
 
 }
diff --git a/man/qc_cdf.Rd b/man/qc_cdf.Rd
index 7fcc1f4..340d996 100644
--- a/man/qc_cdf.Rd
+++ b/man/qc_cdf.Rd
@@ -22,6 +22,9 @@ This function uses pivot_longer to rearrange the data for plotting and then plot
 \examples{
 \dontrun{
 
+gimap_dataset <- get_example_data("gimap")
+qc_cdf(gimap_dataset)
+
 }
 
 }
diff --git a/man/qc_constructs_countzero_bar.Rd b/man/qc_constructs_countzero_bar.Rd
index 968655f..c92e48b 100644
--- a/man/qc_constructs_countzero_bar.Rd
+++ b/man/qc_constructs_countzero_bar.Rd
@@ -4,23 +4,43 @@
 \alias{qc_constructs_countzero_bar}
 \title{Create a bar graph that shows the number of replicates with a zero count for pgRNA constructs flagged by the zero count filter}
 \usage{
-qc_constructs_countzero_bar(gimap_dataset, wide_ar = 0.75)
+qc_constructs_countzero_bar(
+  gimap_dataset,
+  filter_zerocount_target_col = NULL,
+  filter_replicates_target_col = NULL,
+  wide_ar = 0.75
+)
 }
 \arguments{
 \item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data}
 
+\item{filter_zerocount_target_col}{default is NULL; Which sample column(s) should be used to check for counts of 0? If NULL and not specified, downstream analysis will select all sample columns}
+
+\item{filter_replicates_target_col}{default is NULL; Which sample columns are replicates whose variation you'd like to analyze; If NULL, the last 3 sample columns are used}
+
 \item{wide_ar}{aspect ratio, default is 0.75}
 }
 \value{
 a ggplot barplot
 }
 \description{
-A short description...
+This bar graph first uses the specified `filter_zerocount_target_col` columns to flag pgRNA constructs that have a raw count of 0 in any one of those columns/samples of interest.
+Then, it looks at the specified columns for the final day/sample replicates (`filter_replicates_target_col`) to see for pgRNAs that were flagged by the filter, how many of those replicate samples had raw counts of zeros. And it produces a bar plot reporting on this.
+Note, if you select samples/columns to check with the filter that don't have the replicate samples, this graph won't be informative. So you want there to be overlap between the columns for the two target_col parameters to have an informative graph
 }
 \examples{
 \dontrun{
 gimap_dataset <- get_example_data("gimap")
 qc_constructs_countzero_bar(gimap_dataset)
+
+#or if you want to select a specific column(s) for looking at where/which samples zero counts are present for
+qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5)
+
+#or if you want to select a specific column(s) for the final day/sample replicates
+qc_constructs_countzero_bar(gimap_dataset, filter_replicates_target_col = 3:5)
+
+#or some combination of those
+qc_constructs_countzero_bar(gimap_dataset, filter_zerocount_target_col = 3:5, filter_replicates_target_col = 3:5)
 }
 
 }
diff --git a/man/qc_cor_heatmap.Rd b/man/qc_cor_heatmap.Rd
index cd80204..012477b 100644
--- a/man/qc_cor_heatmap.Rd
+++ b/man/qc_cor_heatmap.Rd
@@ -19,7 +19,8 @@ This function uses the `cor` function to find correlations between the sample CP
 }
 \examples{
 \dontrun{
-
+  gimap_dataset <- get_example_data("gimap")
+  qc_cor_heatmap(gimap_dataset)
 }
 
 }
diff --git a/man/qc_filter_plasmid.Rd b/man/qc_filter_plasmid.Rd
new file mode 100644
index 0000000..2d8b18d
--- /dev/null
+++ b/man/qc_filter_plasmid.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/02-filter.R
+\name{qc_filter_plasmid}
+\alias{qc_filter_plasmid}
+\title{Create a filter for pgRNAs which have a low log2 CPM value for the plasmid/Day 0 sample/time point}
+\usage{
+qc_filter_plasmid(
+  gimap_dataset,
+  cutoff = NULL,
+  filter_plasmid_target_col = NULL
+)
+}
+\arguments{
+\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the log2 CPM transformed data}
+
+\item{cutoff}{default is NULL, the cutoff for low log2 CPM values for the plasmid time period; if not specified, The lower outlier (defined by taking the difference of the lower quartile and 1.5 * interquartile range) is used}
+
+\item{filter_plasmid_target_col}{default is NULL, and if NULL, will select the first column only; this parameter specifically should be used to specify the plasmid column(s) that will be selected}
+}
+\value{
+a named list with the filter `filter` specifying which pgRNAs have low plasmid log2 CPM (column of interest is `plasmid_cpm_filter`) and a report df `reportdf` for the number and percent of pgRNA which have a low plasmid log2 CPM
+}
+\description{
+This function flags and reports which and how many pgRNAs have low log2 CPM values for the plasmid/Day 0 sample/time point. If more than one column is specified as the plasmid sample, 
+we pool all the replicate samples to find the lower outlier and flag constructs for which any plasmid replicate has a log2 CPM value below the cutoff
+}
+\examples{
+\dontrun{
+  gimap_dataset <- get_example_data("gimap")
+
+  qc_filter_plasmid(gimap_dataset)
+  
+  #or to specify a cutoff value to be used in the filter rather than the lower outlier default
+  qc_filter_plasmid(gimap_dataset, cutoff=2)
+  
+  #or to specify a different column (or set of columns to select)
+  qc_filter_plasmid(gimap_dataset, filter_plasmid_target_col = 1:2)
+
+  # or to specify a cutoff value that will be used in the filter rather than the lower outlier default as well as to specify a different column (or set of columns) to select
+  qc_filter_plasmid(gimap_dataset, cutoff=1.75, filter_plasmid_target_col=1:2)
+
+}
+
+}
diff --git a/man/qc_filter_zerocounts.Rd b/man/qc_filter_zerocounts.Rd
index 4b2935c..7c7ad28 100644
--- a/man/qc_filter_zerocounts.Rd
+++ b/man/qc_filter_zerocounts.Rd
@@ -4,10 +4,12 @@
 \alias{qc_filter_zerocounts}
 \title{Create a filter for pgRNAs which have a raw count of 0 for any sample/time point}
 \usage{
-qc_filter_zerocounts(gimap_dataset)
+qc_filter_zerocounts(gimap_dataset, filter_zerocount_target_col = NULL)
 }
 \arguments{
-\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data}
+\item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the raw count data}
+
+\item{filter_zerocount_target_col}{default is NULL; Which sample column(s) should be used to check for counts of 0? If NULL and not specified, downstream analysis will select all sample columns}
 }
 \value{
 a named list with the filter `filter` specifying which pgRNA have a count zero for at least one sample/time point and a report df `reportdf` for the number and percent of pgRNA which have a count zero for at least one sample/time point
@@ -17,7 +19,11 @@ This function flags and reports which and how many pgRNAs have a raw count of 0
 }
 \examples{
 \dontrun{
-
+  gimap_dataset <- get_example_data("gimap")
+  qc_filter_zerocounts(gimap_dataset)
+  
+  #or to specify a different column (or set of columns to select)
+  qc_filter_zerocount(gimap_dataset, filter_zerocount_target_col = 1:2)
 }
 
 }
diff --git a/man/qc_plasmid_histogram.Rd b/man/qc_plasmid_histogram.Rd
index 7fd6133..7667214 100644
--- a/man/qc_plasmid_histogram.Rd
+++ b/man/qc_plasmid_histogram.Rd
@@ -4,20 +4,45 @@
 \alias{qc_plasmid_histogram}
 \title{Create a histogram with plasmid log2 CPM values and ascertain a cutoff for low values}
 \usage{
-qc_plasmid_histogram(gimap_dataset, cutoff = NULL, wide_ar = 0.75)
+qc_plasmid_histogram(
+  gimap_dataset,
+  cutoff = NULL,
+  filter_plasmid_target_col = NULL,
+  wide_ar = 0.75
+)
 }
 \arguments{
 \item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data}
 
-\item{cutoff}{default is NULL, the cutoff for low log2 CPM values for the plasmid time period}
+\item{cutoff}{default is NULL, the cutoff for low log2 CPM values for the plasmid time period; if not specified, The lower outlier (defined by taking the difference of the lower quartile and 1.5 * interquartile range) is used}
+
+\item{filter_plasmid_target_col}{default is NULL, and if NULL, will select the first column only; this parameter specifically should be used to specify the plasmid column(s) that will be selected}
 
 \item{wide_ar}{aspect ratio, default is 0.75}
 }
 \value{
-a named list
+a ggplot histogram
 }
 \description{
 Find the distribution of plasmid (day0 data) pgRNA log2 CPM values, and ascertain a cutoff or filter for low log2 CPM values.
 Assumes the first column of the dataset is the day0 data; do I need a better
 method to tell, especially if there are reps?
 }
+\examples{
+\dontrun{
+
+gimap_dataset <- get_example_data("gimap")
+
+qc_plasmid_histogram(gimap_dataset)
+
+# or to specify a "cutoff" value that will be displayed as a dashed vertical line
+qc_plasmid_histogram(gimap_dataset, cutoff=1.75)
+
+# or to specify a different column (or set of columns) to select
+qc_plasmid_histogram(gimap_dataset, filter_plasmid_target_col=1:2)
+
+# or to specify a "cutoff" value that will be displayed as a dashed vertical line as well as to specify a different column (or set of columns) to select
+qc_plasmid_histogram(gimap_dataset, cutoff=2, filter_plasmid_target_col=1:2)
+}
+
+}
diff --git a/man/qc_sample_hist.Rd b/man/qc_sample_hist.Rd
index 710e2c9..81d8f50 100644
--- a/man/qc_sample_hist.Rd
+++ b/man/qc_sample_hist.Rd
@@ -19,7 +19,8 @@ This function uses pivot_longer to rearrange the data for plotting and then plot
 }
 \examples{
 \dontrun{
-
+gimap_dataset <- get_example_data("gimap")
+qc_sample_hist(gimap_dataset)
 }
 
 }
diff --git a/man/qc_variance_hist.Rd b/man/qc_variance_hist.Rd
index 97823f2..6cfd269 100644
--- a/man/qc_variance_hist.Rd
+++ b/man/qc_variance_hist.Rd
@@ -4,11 +4,17 @@
 \alias{qc_variance_hist}
 \title{Create a histogram for the variance within replicates for each pgRNA}
 \usage{
-qc_variance_hist(gimap_dataset, wide_ar = 0.75)
+qc_variance_hist(
+  gimap_dataset,
+  filter_replicates_target_col = NULL,
+  wide_ar = 0.75
+)
 }
 \arguments{
 \item{gimap_dataset}{The special gimap_dataset from the `setup_data` function which contains the transformed data}
 
+\item{filter_replicates_target_col}{default is NULL; Which sample columns are replicates whose variation you'd like to analyze; If NULL, the last 3 sample columns are used}
+
 \item{wide_ar}{aspect ratio, default is 0.75}
 }
 \value{
diff --git a/man/run_qc.Rd b/man/run_qc.Rd
index 36ed657..48467f3 100644
--- a/man/run_qc.Rd
+++ b/man/run_qc.Rd
@@ -9,6 +9,9 @@ run_qc(
   output_file = "./gimap_QC_Report.Rmd",
   plots_dir = "./qc_plots",
   overwrite = FALSE,
+  filter_zerocount_target_col = NULL,
+  filter_plasmid_target_col = NULL,
+  filter_replicates_target_col = NULL,
   ...
 )
 }
@@ -21,6 +24,12 @@ run_qc(
 
 \item{overwrite}{default is FALSE; whether to overwrite the QC Report file}
 
+\item{filter_zerocount_target_col}{default is NULL; Which sample column(s) should be used to check for counts of 0? If NULL and not specified, downstream analysis will select all sample columns}
+
+\item{filter_plasmid_target_col}{default is NULL; Which sample columns(s) should be used to look at log2 CPM expression for plasmid pgRNA constructs? If NULL and not specified, downstream analysis will select the first sample column only}
+
+\item{filter_replicates_target_col}{default is NULL; Which sample columns are replicates whose variation you'd like to analyze; If NULL, the last 3 sample columns are used}
+
 \item{...}{additional parameters are sent to `rmarkdown::render()`}
 }
 \value{
diff --git a/man/setup_data.Rd b/man/setup_data.Rd
index b2c6319..d941743 100644
--- a/man/setup_data.Rd
+++ b/man/setup_data.Rd
@@ -34,7 +34,7 @@ example_counts <- get_example_data("count") \%>\%
   dplyr::select(c("Day00_RepA", "Day05_RepA", "Day22_RepA", "Day22_RepB", "Day22_RepC")) \%>\%
   as.matrix()
 
-gimap_dataset <- setup_data(counts = example_counts_data)
+gimap_dataset <- setup_data(counts = example_counts)
 
 # You can see what an example dataset looks like by pulling the example gimap_dataset:
 gimap_dataset <- get_example_data("gimap")

From 33280039c15c4120af56fca820e5708d1c9c2b3c Mon Sep 17 00:00:00 2001
From: Howard Baek <50791792+howardbaek@users.noreply.github.com>
Date: Wed, 3 Jul 2024 12:03:55 -0700
Subject: [PATCH 2/2] Test `run_qc()`

---
 tests/testthat/test-run_qc.R | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 tests/testthat/test-run_qc.R

diff --git a/tests/testthat/test-run_qc.R b/tests/testthat/test-run_qc.R
new file mode 100644
index 0000000..cce21f8
--- /dev/null
+++ b/tests/testthat/test-run_qc.R
@@ -0,0 +1,6 @@
+test_that("HTML file is created and content is correct", {
+  gimap_dataset <- get_example_data("gimap")
+  html_file <- run_qc(gimap_dataset)
+
+  expect_true(file.exists(html_file))
+})