Adding ard_strata() (#303)

**What changes are proposed in this pull request?** * Style this entry in a way that can be copied directly into `NEWS.md`. (#<issue number>, @<username>) Provide more detail here as needed. **Reference GitHub issue associated with pull request.** _e.g., 'closes #<issue number>'_ closes #273 -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` - [ ] Request a reviewer Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Run `pkgdown::build_site()`. Check the R console for errors, and review the rendered website. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cards (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --------- Signed-off-by: Daniel Sjoberg <[email protected]>
insightsengineering · Oct 2, 2024 · 2e60670 · 2e60670
1 parent d7ea866
commit 2e60670
Show file tree

Hide file tree

Showing 9 changed files with 333 additions and 16 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -35,6 +35,7 @@ export(ard_missing)
 export(ard_stack)
 export(ard_stack_hierarchical)
 export(ard_stack_hierarchical_count)
+export(ard_strata)
 export(ard_total_n)
 export(as_card)
 export(as_nested_list)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,7 @@
 # cards 0.2.2.9016
 
+* Added `ard_strata()` function to ease the task of calculating ARDs stratified by one or more other categorical variables. (#273)
+
 * Add columns `'fmt_fn'`, `'warning'`, and `'errors'` to `ard_attributes()` output. (#327)
 
 * Add checks for factors with no levels, or any levels that are `NA` into `ard_*` functions (#255) 

diff --git a/R/ard_stack_hierarchical.R b/R/ard_stack_hierarchical.R
@@ -413,8 +413,8 @@ internal_stack_hierarchical <- function(data,
           total_n = FALSE,
           shuffle = FALSE,
           include_uni_by_tab = FALSE
-        ) |>
-          eval_tidy() |>
+        ) %>%
+          {suppressMessages(eval_tidy(.))} |> # styler: off
           list()
       )
   }

diff --git a/R/ard_strata.R b/R/ard_strata.R
@@ -0,0 +1,88 @@
+#' Stratified ARD
+#'
+#' @description
+#' `r lifecycle::badge('experimental')`\cr
+#' General function for calculating ARD results within subgroups.
+#'
+#' While the examples below show use with other functions from the cards package,
+#' this function would primarily be used with the statistical functions in the
+#' cardx functions.
+#'
+#' @param .data (`data.frame`)\cr
+#'   a data frame
+#' @param .by,.strata ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   columns to tabulate by/stratify by for calculation.
+#'   Arguments are similar, but with an important distinction:
+#'
+#'   `.by`: results are tabulated by **all combinations** of the columns specified,
+#'       including unobserved combinations and unobserved factor levels.
+#'
+#'   `.strata`: results are tabulated by **all _observed_ combinations** of the
+#'      columns specified.
+#'
+#'  These argument *should not* include any columns that appear in the `.f` argument.
+#' @param .f (`function`, `formula`)\cr
+#'   a function or a formula that can be coerced to a function with
+#'   `rlang::as_function()` (similar to `purrr::map(.f)`)
+#' @param ... Additional arguments passed on to the `.f` function.
+#'
+#' @return an ARD data frame of class 'card'
+#' @export
+#'
+#' @examples
+#' ard_strata(
+#'   ADSL,
+#'   by = ARM,
+#'   .f = ~ ard_continuous(.x, variables = AGE)
+#' )
+ard_strata <- function(.data, .by = NULL, .strata = NULL, .f, ...) {
+  set_cli_abort_call()
+
+  # check inputs ---------------------------------------------------------------
+  check_not_missing(.data)
+  check_not_missing(.f)
+  check_data_frame(.data)
+
+  # process inputs -------------------------------------------------------------
+  .f <- rlang::as_function(x = .f, call = get_cli_abort_call())
+  process_selectors(.data, .by = {{ .by }}, .strata = {{ .strata }})
+
+  # nest the data frame --------------------------------------------------------
+  df_nested_data <- nest_for_ard(.data, by = .by, strata = .strata)
+
+  # run fn on nested data frames -----------------------------------------------
+  df_nested_data <- df_nested_data |>
+    dplyr::mutate(ard = lapply(.data$data, .f, ...)) |>
+    dplyr::select(-"data")
+
+  # rename grouping variables --------------------------------------------------
+  # get the number grouping columns in the calculations
+  max_group_n <-
+    map(
+      df_nested_data$ard,
+      ~ dplyr::select(.x, all_ard_groups("names")) |> names()
+    ) |>
+    unlist() |>
+    unique() |>
+    sort() |>
+    str_remove(pattern = "^group") |>
+    as.integer() %>%
+    # if no grouping variables are present, this will return `-Inf`
+    {suppressWarnings(max(..1 = .))} # styler: off
+
+  if (!is.infinite(max_group_n) && !is_empty(c(.by, .strata))) {
+    new_group_colnames <-
+      c(
+        paste0("group", seq_along(c(.by, .strata)) + max_group_n),
+        paste0("group", seq_along(c(.by, .strata)) + max_group_n, "_level")
+      ) |>
+      sort()
+    names(df_nested_data)[seq_along(new_group_colnames)] <- new_group_colnames
+  }
+
+  # unnest ard data frame and return final table -------------------------------
+  df_nested_data |>
+    tidyr::unnest(cols = all_of("ard")) |>
+    as_card() |>
+    tidy_ard_column_order()
+}
diff --git a/R/import-standalone-stringr.R b/R/import-standalone-stringr.R
@@ -4,7 +4,7 @@
 #
 # ---
 # file: standalone-stringr.R
-# last-updated: 2024-01-24
+# last-updated: 2024-06-05
 # license: https://unlicense.org
 # imports: rlang
 # ---
@@ -23,25 +23,119 @@ str_trim <- function(string, side = c("both", "left", "right")) {
   trimws(x = string, which = side, whitespace = "[ \t\r\n]")
 }
 
-str_squish <- function(string) {
-  gsub(x = string, pattern = "\\s+", replacement = " ") |>
-    str_trim(side = "both")
+str_squish <- function(string, fixed = FALSE, perl = !fixed) {
+  string <- gsub("\\s+", " ", string, perl = perl)  # Replace multiple white spaces with a single white space
+  string <- gsub("^\\s+|\\s+$", "", string, perl = perl)  # Trim leading and trailing white spaces
+  return(string)
 }
 
-str_remove_all <- function(string, pattern) {
-  gsub(x = string, pattern = pattern, replacement = "")
+str_remove <- function (string, pattern, fixed = FALSE, perl = !fixed) {
+  sub (x = string, pattern = pattern, replacement = "", fixed = fixed, perl = perl)
 }
 
-str_extract <- function(string, pattern) {
-  ifelse(
-    str_detect(string, pattern),
-    regmatches(x = string, m = regexpr(pattern = pattern, text = string)),
-    NA_character_
-  )
+str_remove_all <- function(string, pattern, fixed = FALSE, perl = !fixed) {
+  gsub(x = string, pattern = pattern, replacement = "", fixed = fixed, perl = perl)
 }
 
-str_detect <- function(string, pattern) {
-  grepl(pattern = pattern, x = string)
+str_extract <- function(string, pattern, fixed = FALSE, perl = !fixed) {
+  res <- rep(NA_character_, length.out = length(string))
+  res[str_detect(string, pattern, fixed = fixed)] <-
+    regmatches(x = string, m = regexpr(pattern = pattern, text = string, fixed = fixed, perl = perl))
+
+  res
+}
+
+str_extract_all <- function(string, pattern, fixed = FALSE, perl = !fixed) {
+  regmatches(x = string, m = gregexpr(pattern = pattern, text = string, fixed = fixed, perl = perl))
+}
+
+str_detect <- function(string, pattern, fixed = FALSE, perl = !fixed) {
+  grepl(pattern = pattern, x = string, fixed = fixed, perl = perl)
+}
+
+str_replace <- function(string, pattern, replacement, fixed = FALSE, perl = !fixed) {
+  sub(x = string, pattern = pattern, replacement = replacement, fixed = fixed, perl = perl)
+}
+
+str_replace_all <- function (string, pattern, replacement, fixed = FALSE, perl = !fixed){
+  gsub(x = string, pattern = pattern, replacement = replacement, fixed = fixed, perl = perl)
+}
+
+word <- function(string, start, end = start, sep = " ", fixed = TRUE, perl = !fixed) {
+  # Handle vectorized string input
+  if (length(string) > 1) {
+    return(sapply(string, word, start, end, sep, fixed, USE.NAMES = FALSE))
+  }
+
+  words <- unlist(strsplit(string, split = sep, fixed = fixed, perl = perl))
+  words <- words[words != ""]  # Remove empty strings
+
+  # Adjust negative indices
+  n <- length(words)
+  if (start < 0) {
+    start <- n + start + 1
+  }
+  if (end < 0) {
+    end <- n + end + 1
+  }
+
+  # Validate indices
+  if (start < 1 || end > n || start > end) {
+    return(NA)
+  } else {
+    extracted_words <- words[start:end]
+    return(paste(extracted_words, collapse = sep))
+  }
+}
+
+str_sub <- function(string, start = 1L, end = -1L){
+  str_length <- nchar(string)
+
+  # Adjust start and end indices for negative values
+  if (start < 0) {
+    start <- str_length + start + 1
+  }
+  if (end < 0) {
+    end <- str_length + end + 1
+  }
+
+  substr(x = string, start = start, stop = end)
+}
+
+str_sub_all <- function(string, start = 1L, end = -1L){
+  lapply(string, function(x) substr(x, start = start, stop = end))
+}
+
+str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ", use_width = TRUE){
+  side <- match.arg(side, c("left", "right", "both"))
+
+  if (side == "both") {
+    pad_left <- (width - nchar(string)) %/% 2
+    pad_right <- width - nchar(string) - pad_left
+    padded_string <- paste0(strrep(pad, pad_left), string, strrep(pad, pad_right))
+  } else {
+    format_string <- ifelse(side == "right", paste0("%-", width, "s"),
+                            ifelse(side == "left", paste0("%", width, "s"),
+                                   paste0("%", width, "s")))
+
+    padded_string <- sprintf(format_string, string)
+  }
+
+  return(padded_string)
+}
+
+str_split <- function(string, pattern, n = Inf, fixed = FALSE, perl = !fixed) {
+  if (n == Inf) {
+    return(strsplit(string, split = pattern, fixed = fixed, perl = perl))
+  } else {
+    parts <- strsplit(string, split = pattern, fixed = fixed, perl = perl)
+    lapply(parts, function(x) {
+      if (length(x) > n) {
+        x <- c(x[1:(n-1)], paste(x[n:length(x)], collapse = pattern))
+      }
+      return(x)
+    })
+  }
 }
 
 # nocov end

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -33,6 +33,7 @@ reference:
       - ard_complex
       - ard_stack
       - ard_stack_hierarchical
+      - ard_strata
 
   - subtitle: "Misc."
   -  contents:

diff --git a/man/ard_strata.Rd b/man/ard_strata.Rd
diff --git a/tests/testthat/_snaps/ard_strata.md b/tests/testthat/_snaps/ard_strata.md
@@ -0,0 +1,46 @@
+# ard_strata() works
+
+    Code
+      ard_strata(ADSL, .by = ARM, .f = ~ ard_continuous(.x, variables = AGE))
+    Message
+      {cards} data frame: 24 x 10
+    Output
+         group1 group1_level variable stat_name stat_label   stat
+      1     ARM      Placebo      AGE         N          N     86
+      2     ARM      Placebo      AGE      mean       Mean 75.209
+      3     ARM      Placebo      AGE        sd         SD   8.59
+      4     ARM      Placebo      AGE    median     Median     76
+      5     ARM      Placebo      AGE       p25         Q1     69
+      6     ARM      Placebo      AGE       p75         Q3     82
+      7     ARM      Placebo      AGE       min        Min     52
+      8     ARM      Placebo      AGE       max        Max     89
+      9     ARM    Xanomeli…      AGE         N          N     84
+      10    ARM    Xanomeli…      AGE      mean       Mean 74.381
+    Message
+      i 14 more rows
+      i Use `print(n = ...)` to see more rows
+      i 4 more variables: context, fmt_fn, warning, error
+
+---
+
+    Code
+      ard_strata(ADSL, .strata = ARM, .f = ~ ard_continuous(.x, variables = AGE, by = AGEGR1))
+    Message
+      {cards} data frame: 72 x 12
+    Output
+         group1 group1_level group2 group2_level variable stat_name stat_label   stat
+      1  AGEGR1        65-80    ARM      Placebo      AGE         N          N     42
+      2  AGEGR1        65-80    ARM      Placebo      AGE      mean       Mean 73.595
+      3  AGEGR1        65-80    ARM      Placebo      AGE        sd         SD  4.173
+      4  AGEGR1        65-80    ARM      Placebo      AGE    median     Median     74
+      5  AGEGR1        65-80    ARM      Placebo      AGE       p25         Q1     70
+      6  AGEGR1        65-80    ARM      Placebo      AGE       p75         Q3     77
+      7  AGEGR1        65-80    ARM      Placebo      AGE       min        Min     65
+      8  AGEGR1        65-80    ARM      Placebo      AGE       max        Max     80
+      9  AGEGR1          <65    ARM      Placebo      AGE         N          N     14
+      10 AGEGR1          <65    ARM      Placebo      AGE      mean       Mean 61.143
+    Message
+      i 62 more rows
+      i Use `print(n = ...)` to see more rows
+      i 4 more variables: context, fmt_fn, warning, error
+
diff --git a/tests/testthat/test-ard_strata.R b/tests/testthat/test-ard_strata.R
@@ -0,0 +1,37 @@
+test_that("ard_strata() works", {
+  expect_snapshot(
+    ard_strata(
+      ADSL,
+      .by = ARM,
+      .f = ~ ard_continuous(.x, variables = AGE)
+    )
+  )
+
+  expect_snapshot(
+    ard_strata(
+      ADSL,
+      .strata = ARM,
+      .f = ~ ard_continuous(.x, variables = AGE, by = AGEGR1)
+    )
+  )
+
+  expect_equal(
+    ard_strata(ADSL, .by = ARM, .f = ~ ard_continuous(.x, by = c(SEX, AGEGR1), variables = AGE)) |>
+      tidy_ard_column_order() |>
+      tidy_ard_row_order(),
+    ard_continuous(ADSL, by = c(SEX, AGEGR1, ARM), variables = AGE) |>
+      tidy_ard_row_order()
+  )
+})
+
+test_that("ard_strata(by,strata) when both empty", {
+  expect_equal(
+    ard_strata(ADSL, .f = ~ ard_continuous(.x, variables = AGE)),
+    ard_continuous(ADSL, variables = AGE)
+  )
+
+  expect_equal(
+    ard_strata(ADSL, .f = ~ ard_continuous(.x, by = ARM, variables = AGE)),
+    ard_continuous(ADSL, by = ARM, variables = AGE)
+  )
+})