Adding ard_pairwise() (#360)

**What changes are proposed in this pull request?** * Added function `ard_pairwise()` to ease the calculations of pairwise analyses. (#359) **Reference GitHub issue associated with pull request.** _e.g., 'closes #<issue number>'_ closes #359 -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` - [ ] Request a reviewer Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Run `pkgdown::build_site()`. Check the R console for errors, and review the rendered website. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cards (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --------- Signed-off-by: Daniel Sjoberg <[email protected]> Co-authored-by: Emily de la Rua <[email protected]>
insightsengineering · Nov 26, 2024 · efb3322 · efb3322
1 parent a937810
commit efb3322
Show file tree

Hide file tree

Showing 8 changed files with 311 additions and 3 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -32,6 +32,7 @@ export(ard_dichotomous)
 export(ard_hierarchical)
 export(ard_hierarchical_count)
 export(ard_missing)
+export(ard_pairwise)
 export(ard_stack)
 export(ard_stack_hierarchical)
 export(ard_stack_hierarchical_count)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # cards 0.3.0.9004
 
+* Added function `ard_pairwise()` to ease the calculations of pairwise analyses. (#359)
+
 * The `tidy_ard_column_order()` now correctly orders grouping columns when there are 10+ groups. This also corrects an issue in the hierarchical functions where the ordering of the variables matters. (#352)
 
 * No longer exporting functions `check_pkg_installed()`, `is_pkg_installed()`, `get_min_version_required()`, `get_pkg_dependencies()`. These functions are now internal-only. (#330)

diff --git a/R/ard_pairwise.R b/R/ard_pairwise.R
@@ -0,0 +1,95 @@
+#' Pairwise ARD
+#'
+#' Utility to perform pairwise comparisons.
+#'
+#' @param data (`data.frame`)\cr
+#'   a data frame
+#' @param variable ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   Column to perform pairwise analyses for.
+#' @param .f (`function`)\cr
+#'   a function that creates ARDs. The function accepts a single argument and
+#'   a subset of `data` will be passed including the two levels of `variable`
+#'   for the pairwise analysis.
+#' @param include (`vector`)\cr
+#'   a vector of levels of the `variable` column to include in comparisons.
+#'   Pairwise comparisons will only be performed for pairs that have a level
+#'   specified here. Default is `NULL` and all pairwise computations are included.
+#'
+#' @return list of ARDs
+#' @export
+#'
+#' @examples
+#' ard_pairwise(
+#'   ADSL,
+#'   variable = ARM,
+#'   .f = \(df) {
+#'     ard_complex(
+#'       df,
+#'       variables = AGE,
+#'       statistic = ~ list(ttest = \(x, data, ...) t.test(x ~ data$ARM)[c("statistic", "p.value")])
+#'     )
+#'   },
+#'   include = "Placebo" # only include comparisons to the "Placebo" group
+#' )
+ard_pairwise <- function(data, variable, .f, include = NULL) {
+  set_cli_abort_call()
+
+  # check inputs ---------------------------------------------------------------
+  check_data_frame(data)
+  process_selectors(data, variable = {{ variable }})
+  check_scalar(variable)
+  if (!is_empty(include) && (!is_vector(include) || is.list(include))) {
+    cli::cli_abort(
+      "The {.arg include} argument must be a simple vector, not {.obj_type_friendly {include}}.",
+      call = get_cli_abort_call()
+    )
+  }
+  .f <- as_function(.f, call = get_cli_abort_call())
+  variable_levels <- .unique_and_sorted(data[[variable]])
+  if (!is_empty(include)) {
+    if (!all(include %in% variable_levels)) {
+      cli::cli_abort(
+        "The {.arg include} argument must be NULL or one or more of {.val {variable_levels}}.",
+        call = get_cli_abort_call()
+      )
+    }
+  }
+  include <- include %||% variable_levels # if include not specified, default to all levels
+
+  # identify all pairwise values in `variable` ---------------------------------
+  mtx_pairs <- variable_levels |> utils::combn(m = 2)
+  lst_pairs <- seq_len(ncol(mtx_pairs)) |> lapply(FUN = \(x) mtx_pairs[, x])
+  lst_pairs <- lst_pairs[map_lgl(lst_pairs, ~ any(.x %in% include))] # exclude pairs that were not requested
+
+  # create data subsets including the pairs ------------------------------------
+  lst_df_subsets <-
+    lapply(
+      lst_pairs,
+      FUN = \(x) {
+        df_subset <- data |> dplyr::filter(.data[[variable]] %in% .env$x)
+        if (is.factor(data[[variable]])) {
+          data[[variable]] <- factor(data[[variable]], ordered = is.ordered(data[[variable]]))
+        }
+        df_subset
+      }
+    ) |>
+    # set names for returned list including the pair levels
+    stats::setNames(map_chr(lst_pairs, ~ as.character(.x) |>
+      shQuote(type = "csh") |>
+      paste(collapse = " vs. ")))
+
+  # perform analysis -----------------------------------------------------------
+  lst_ard <-
+    imap(
+      lst_df_subsets,
+      \(df, pairs) {
+        eval_capture_conditions(.f(df)) |>
+          captured_condition_as_error(
+            message = c(glue::glue("The following {{type}} occurred for {pairs}. See message below."), x = "{condition}")
+          )
+      }
+    )
+
+  # return result --------------------------------------------------------------
+  lst_ard
+}
diff --git a/R/ard_strata.R b/R/ard_strata.R
@@ -52,7 +52,7 @@ ard_strata <- function(.data, .by = NULL, .strata = NULL, .f, ...) {
 
   # run fn on nested data frames -----------------------------------------------
   df_nested_data <- df_nested_data |>
-    dplyr::mutate(ard = lapply(.data$data, .f, ...)) |>
+    dplyr::mutate(ard = map(.data$data, .f, ...)) |>
     dplyr::select(-"data")
 
   # rename grouping variables --------------------------------------------------

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -33,13 +33,13 @@ reference:
       - ard_complex
       - ard_stack
       - ard_stack_hierarchical
-      - ard_strata
 
   - subtitle: "Misc."
   -  contents:
+      - ard_strata
+      - ard_pairwise
       - ard_attributes
 
-
   - title: "ARD Utilities"
   - subtitle: "Construct ARDs"
     contents:

diff --git a/man/ard_pairwise.Rd b/man/ard_pairwise.Rd
diff --git a/tests/testthat/_snaps/ard_pairwise.md b/tests/testthat/_snaps/ard_pairwise.md
@@ -0,0 +1,47 @@
+# ard_pairwise(variable) messaging
+
+    Code
+      ard_pairwise(ADSL, variable = c(ARM, AGEGR1), .f = function(df) ard_complex(df,
+        variables = AGE, statistic = ~ list(ttest = ttest_fn)))
+    Condition
+      Error in `ard_pairwise()`:
+      ! The `variable` argument must be length 1.
+
+---
+
+    Code
+      ard_pairwise(ADSL, variable = NOT_A_COLUMN, .f = function(df) ard_complex(df,
+        variables = AGE, statistic = ~ list(ttest = ttest_fn)))
+    Condition
+      Error in `ard_pairwise()`:
+      ! Error processing `variable` argument.
+      ! Can't select columns that don't exist. x Column `NOT_A_COLUMN` doesn't exist.
+      i Select among columns "STUDYID", "USUBJID", "SUBJID", "SITEID", "SITEGR1", "ARM", "TRT01P", "TRT01PN", "TRT01A", "TRT01AN", "TRTSDT", "TRTEDT", "TRTDUR", "AVGDD", "CUMDOSE", "AGE", "AGEGR1", "AGEGR1N", ..., "DCREASCD", and "MMSETOT"
+
+# ard_pairwise(.f) messaging
+
+    Code
+      ard_pairwise(ADSL, variable = ARM, .f = function(df) stop("I MADE THIS ERROR"))
+    Condition
+      Error in `ard_pairwise()`:
+      ! The following error occurred for 'Placebo' vs. 'Xanomeline High Dose'. See message below.
+      x I MADE THIS ERROR
+
+# ard_pairwise(include) messaging
+
+    Code
+      ard_pairwise(ADSL, variable = ARM, .f = function(df) ard_complex(df, variables = AGE,
+        statistic = ~ list(ttest = ttest_fn)), include = "NOT_A_LEVEL")
+    Condition
+      Error in `ard_pairwise()`:
+      ! The `include` argument must be NULL or one or more of "Placebo", "Xanomeline High Dose", and "Xanomeline Low Dose".
+
+---
+
+    Code
+      ard_pairwise(ADSL, variable = ARM, .f = function(df) ard_complex(df, variables = AGE,
+        statistic = ~ list(ttest = ttest_fn)), include = mtcars)
+    Condition
+      Error in `ard_pairwise()`:
+      ! The `include` argument must be a simple vector, not a data frame.
+
diff --git a/tests/testthat/test-ard_pairwise.R b/tests/testthat/test-ard_pairwise.R
@@ -0,0 +1,118 @@
+ttest_fn <- \(x, data, ...) t.test(x ~ data$ARM)[c("statistic", "p.value")]
+
+test_that("ard_pairwise() works", {
+  expect_silent(
+    lst_ard <-
+      ard_pairwise(
+        ADSL,
+        variable = ARM,
+        .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn)),
+        include = "Placebo" # only include comparisons to the "Placebo" group
+      )
+  )
+  expect_length(lst_ard, 2L)
+
+  expect_equal(
+    lst_ard[["'Placebo' vs. 'Xanomeline High Dose'"]],
+    ard_complex(
+      ADSL |> dplyr::filter(ARM %in% c("Placebo", "Xanomeline High Dose")),
+      variables = AGE,
+      statistic = ~ list(ttest = ttest_fn)
+    )
+  )
+
+  expect_equal(
+    lst_ard[["'Placebo' vs. 'Xanomeline Low Dose'"]],
+    ard_complex(
+      ADSL |> dplyr::filter(ARM %in% c("Placebo", "Xanomeline Low Dose")),
+      variables = AGE,
+      statistic = ~ list(ttest = ttest_fn)
+    )
+  )
+})
+
+test_that("ard_pairwise(variable)", {
+  # we get expected results with unobserved factor levels
+  expect_silent(
+    lst_ard <-
+      data.frame(
+        ARM = rep_len("Placebo", 20L) |> factor(levels = c("Placebo", "Unobserved Level")),
+        AGE = 1:20
+      ) |>
+      ard_pairwise(
+        variable = ARM,
+        .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn))
+      )
+  )
+  expect_equal(names(lst_ard), "'Placebo' vs. 'Unobserved Level'")
+  expect_s3_class(lst_ard[[1]], "card")
+  expect_equal(nrow(lst_ard[[1]]), 1L)
+})
+
+test_that("ard_pairwise(variable) messaging", {
+  # only works with a single variable
+  expect_snapshot(
+    error = TRUE,
+    ard_pairwise(
+      ADSL,
+      variable = c(ARM, AGEGR1),
+      .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn))
+    )
+  )
+
+  expect_snapshot(
+    error = TRUE,
+    ard_pairwise(
+      ADSL,
+      variable = NOT_A_COLUMN,
+      .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn))
+    )
+  )
+})
+
+test_that("ard_pairwise(include)", {
+  expect_silent(
+    lst_ard <-
+      ard_pairwise(
+        ADSL,
+        variable = ARM,
+        .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn)),
+        include = "Placebo" # only include comparisons to the "Placebo" group
+      )
+  )
+  expect_equal(
+    names(lst_ard),
+    c("'Placebo' vs. 'Xanomeline High Dose'", "'Placebo' vs. 'Xanomeline Low Dose'")
+  )
+})
+
+test_that("ard_pairwise(.f) messaging", {
+  expect_snapshot(
+    error = TRUE,
+    ard_pairwise(ADSL, variable = ARM, .f = \(df) stop("I MADE THIS ERROR"))
+  )
+})
+
+test_that("ard_pairwise(include) messaging", {
+  # include is not a level of the variable
+  expect_snapshot(
+    error = TRUE,
+    ard_pairwise(
+      ADSL,
+      variable = ARM,
+      .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn)),
+      include = "NOT_A_LEVEL"
+    )
+  )
+
+  # include input is not a vector
+  expect_snapshot(
+    error = TRUE,
+    ard_pairwise(
+      ADSL,
+      variable = ARM,
+      .f = \(df) ard_complex(df, variables = AGE, statistic = ~ list(ttest = ttest_fn)),
+      include = mtcars
+    )
+  )
+})