diff --git a/NAMESPACE b/NAMESPACE index 2acc92db..ab4bbbc0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,6 +35,7 @@ export(ard_missing) export(ard_stack) export(ard_stack_hierarchical) export(ard_stack_hierarchical_count) +export(ard_strata) export(ard_total_n) export(as_card) export(as_nested_list) diff --git a/NEWS.md b/NEWS.md index d20304ec..b9451962 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # cards 0.2.2.9016 +* Added `ard_strata()` function to ease the task of calculating ARDs stratified by one or more other categorical variables. (#273) + * Add columns `'fmt_fn'`, `'warning'`, and `'errors'` to `ard_attributes()` output. (#327) * Add checks for factors with no levels, or any levels that are `NA` into `ard_*` functions (#255) diff --git a/R/ard_stack_hierarchical.R b/R/ard_stack_hierarchical.R index ab485497..fe82adee 100644 --- a/R/ard_stack_hierarchical.R +++ b/R/ard_stack_hierarchical.R @@ -413,8 +413,8 @@ internal_stack_hierarchical <- function(data, total_n = FALSE, shuffle = FALSE, include_uni_by_tab = FALSE - ) |> - eval_tidy() |> + ) %>% + {suppressMessages(eval_tidy(.))} |> # styler: off list() ) } diff --git a/R/ard_strata.R b/R/ard_strata.R new file mode 100644 index 00000000..a12d8a85 --- /dev/null +++ b/R/ard_strata.R @@ -0,0 +1,88 @@ +#' Stratified ARD +#' +#' @description +#' `r lifecycle::badge('experimental')`\cr +#' General function for calculating ARD results within subgroups. +#' +#' While the examples below show use with other functions from the cards package, +#' this function would primarily be used with the statistical functions in the +#' cardx functions. +#' +#' @param .data (`data.frame`)\cr +#' a data frame +#' @param .by,.strata ([`tidy-select`][dplyr::dplyr_tidy_select])\cr +#' columns to tabulate by/stratify by for calculation. +#' Arguments are similar, but with an important distinction: +#' +#' `.by`: results are tabulated by **all combinations** of the columns specified, +#' including unobserved combinations and unobserved factor levels. +#' +#' `.strata`: results are tabulated by **all _observed_ combinations** of the +#' columns specified. +#' +#' These argument *should not* include any columns that appear in the `.f` argument. +#' @param .f (`function`, `formula`)\cr +#' a function or a formula that can be coerced to a function with +#' `rlang::as_function()` (similar to `purrr::map(.f)`) +#' @param ... Additional arguments passed on to the `.f` function. +#' +#' @return an ARD data frame of class 'card' +#' @export +#' +#' @examples +#' ard_strata( +#' ADSL, +#' by = ARM, +#' .f = ~ ard_continuous(.x, variables = AGE) +#' ) +ard_strata <- function(.data, .by = NULL, .strata = NULL, .f, ...) { + set_cli_abort_call() + + # check inputs --------------------------------------------------------------- + check_not_missing(.data) + check_not_missing(.f) + check_data_frame(.data) + + # process inputs ------------------------------------------------------------- + .f <- rlang::as_function(x = .f, call = get_cli_abort_call()) + process_selectors(.data, .by = {{ .by }}, .strata = {{ .strata }}) + + # nest the data frame -------------------------------------------------------- + df_nested_data <- nest_for_ard(.data, by = .by, strata = .strata) + + # run fn on nested data frames ----------------------------------------------- + df_nested_data <- df_nested_data |> + dplyr::mutate(ard = lapply(.data$data, .f, ...)) |> + dplyr::select(-"data") + + # rename grouping variables -------------------------------------------------- + # get the number grouping columns in the calculations + max_group_n <- + map( + df_nested_data$ard, + ~ dplyr::select(.x, all_ard_groups("names")) |> names() + ) |> + unlist() |> + unique() |> + sort() |> + str_remove(pattern = "^group") |> + as.integer() %>% + # if no grouping variables are present, this will return `-Inf` + {suppressWarnings(max(..1 = .))} # styler: off + + if (!is.infinite(max_group_n) && !is_empty(c(.by, .strata))) { + new_group_colnames <- + c( + paste0("group", seq_along(c(.by, .strata)) + max_group_n), + paste0("group", seq_along(c(.by, .strata)) + max_group_n, "_level") + ) |> + sort() + names(df_nested_data)[seq_along(new_group_colnames)] <- new_group_colnames + } + + # unnest ard data frame and return final table ------------------------------- + df_nested_data |> + tidyr::unnest(cols = all_of("ard")) |> + as_card() |> + tidy_ard_column_order() +} diff --git a/R/import-standalone-stringr.R b/R/import-standalone-stringr.R index 263bde5b..9c243995 100644 --- a/R/import-standalone-stringr.R +++ b/R/import-standalone-stringr.R @@ -4,7 +4,7 @@ # # --- # file: standalone-stringr.R -# last-updated: 2024-01-24 +# last-updated: 2024-06-05 # license: https://unlicense.org # imports: rlang # --- @@ -23,25 +23,119 @@ str_trim <- function(string, side = c("both", "left", "right")) { trimws(x = string, which = side, whitespace = "[ \t\r\n]") } -str_squish <- function(string) { - gsub(x = string, pattern = "\\s+", replacement = " ") |> - str_trim(side = "both") +str_squish <- function(string, fixed = FALSE, perl = !fixed) { + string <- gsub("\\s+", " ", string, perl = perl) # Replace multiple white spaces with a single white space + string <- gsub("^\\s+|\\s+$", "", string, perl = perl) # Trim leading and trailing white spaces + return(string) } -str_remove_all <- function(string, pattern) { - gsub(x = string, pattern = pattern, replacement = "") +str_remove <- function (string, pattern, fixed = FALSE, perl = !fixed) { + sub (x = string, pattern = pattern, replacement = "", fixed = fixed, perl = perl) } -str_extract <- function(string, pattern) { - ifelse( - str_detect(string, pattern), - regmatches(x = string, m = regexpr(pattern = pattern, text = string)), - NA_character_ - ) +str_remove_all <- function(string, pattern, fixed = FALSE, perl = !fixed) { + gsub(x = string, pattern = pattern, replacement = "", fixed = fixed, perl = perl) } -str_detect <- function(string, pattern) { - grepl(pattern = pattern, x = string) +str_extract <- function(string, pattern, fixed = FALSE, perl = !fixed) { + res <- rep(NA_character_, length.out = length(string)) + res[str_detect(string, pattern, fixed = fixed)] <- + regmatches(x = string, m = regexpr(pattern = pattern, text = string, fixed = fixed, perl = perl)) + + res +} + +str_extract_all <- function(string, pattern, fixed = FALSE, perl = !fixed) { + regmatches(x = string, m = gregexpr(pattern = pattern, text = string, fixed = fixed, perl = perl)) +} + +str_detect <- function(string, pattern, fixed = FALSE, perl = !fixed) { + grepl(pattern = pattern, x = string, fixed = fixed, perl = perl) +} + +str_replace <- function(string, pattern, replacement, fixed = FALSE, perl = !fixed) { + sub(x = string, pattern = pattern, replacement = replacement, fixed = fixed, perl = perl) +} + +str_replace_all <- function (string, pattern, replacement, fixed = FALSE, perl = !fixed){ + gsub(x = string, pattern = pattern, replacement = replacement, fixed = fixed, perl = perl) +} + +word <- function(string, start, end = start, sep = " ", fixed = TRUE, perl = !fixed) { + # Handle vectorized string input + if (length(string) > 1) { + return(sapply(string, word, start, end, sep, fixed, USE.NAMES = FALSE)) + } + + words <- unlist(strsplit(string, split = sep, fixed = fixed, perl = perl)) + words <- words[words != ""] # Remove empty strings + + # Adjust negative indices + n <- length(words) + if (start < 0) { + start <- n + start + 1 + } + if (end < 0) { + end <- n + end + 1 + } + + # Validate indices + if (start < 1 || end > n || start > end) { + return(NA) + } else { + extracted_words <- words[start:end] + return(paste(extracted_words, collapse = sep)) + } +} + +str_sub <- function(string, start = 1L, end = -1L){ + str_length <- nchar(string) + + # Adjust start and end indices for negative values + if (start < 0) { + start <- str_length + start + 1 + } + if (end < 0) { + end <- str_length + end + 1 + } + + substr(x = string, start = start, stop = end) +} + +str_sub_all <- function(string, start = 1L, end = -1L){ + lapply(string, function(x) substr(x, start = start, stop = end)) +} + +str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ", use_width = TRUE){ + side <- match.arg(side, c("left", "right", "both")) + + if (side == "both") { + pad_left <- (width - nchar(string)) %/% 2 + pad_right <- width - nchar(string) - pad_left + padded_string <- paste0(strrep(pad, pad_left), string, strrep(pad, pad_right)) + } else { + format_string <- ifelse(side == "right", paste0("%-", width, "s"), + ifelse(side == "left", paste0("%", width, "s"), + paste0("%", width, "s"))) + + padded_string <- sprintf(format_string, string) + } + + return(padded_string) +} + +str_split <- function(string, pattern, n = Inf, fixed = FALSE, perl = !fixed) { + if (n == Inf) { + return(strsplit(string, split = pattern, fixed = fixed, perl = perl)) + } else { + parts <- strsplit(string, split = pattern, fixed = fixed, perl = perl) + lapply(parts, function(x) { + if (length(x) > n) { + x <- c(x[1:(n-1)], paste(x[n:length(x)], collapse = pattern)) + } + return(x) + }) + } } # nocov end diff --git a/_pkgdown.yml b/_pkgdown.yml index 77ffe371..3d309d37 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,7 @@ reference: - ard_complex - ard_stack - ard_stack_hierarchical + - ard_strata - subtitle: "Misc." - contents: diff --git a/man/ard_strata.Rd b/man/ard_strata.Rd new file mode 100644 index 00000000..bd1f8c40 --- /dev/null +++ b/man/ard_strata.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ard_strata.R +\name{ard_strata} +\alias{ard_strata} +\title{Stratified ARD} +\usage{ +ard_strata(.data, .by = NULL, .strata = NULL, .f, ...) +} +\arguments{ +\item{.data}{(\code{data.frame})\cr +a data frame} + +\item{.by, .strata}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +columns to tabulate by/stratify by for calculation. +Arguments are similar, but with an important distinction: + +\code{.by}: results are tabulated by \strong{all combinations} of the columns specified, +including unobserved combinations and unobserved factor levels. + +\code{.strata}: results are tabulated by \strong{all \emph{observed} combinations} of the +columns specified. + +These argument \emph{should not} include any columns that appear in the \code{.f} argument.} + +\item{.f}{(\code{function}, \code{formula})\cr +a function or a formula that can be coerced to a function with +\code{rlang::as_function()} (similar to \code{purrr::map(.f)})} + +\item{...}{Additional arguments passed on to the \code{.f} function.} +} +\value{ +an ARD data frame of class 'card' +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr +General function for calculating ARD results within subgroups. + +While the examples below show use with other functions from the cards package, +this function would primarily be used with the statistical functions in the +cardx functions. +} +\examples{ +ard_strata( + ADSL, + by = ARM, + .f = ~ ard_continuous(.x, variables = AGE) +) +} diff --git a/tests/testthat/_snaps/ard_strata.md b/tests/testthat/_snaps/ard_strata.md new file mode 100644 index 00000000..ee5ee873 --- /dev/null +++ b/tests/testthat/_snaps/ard_strata.md @@ -0,0 +1,46 @@ +# ard_strata() works + + Code + ard_strata(ADSL, .by = ARM, .f = ~ ard_continuous(.x, variables = AGE)) + Message + {cards} data frame: 24 x 10 + Output + group1 group1_level variable stat_name stat_label stat + 1 ARM Placebo AGE N N 86 + 2 ARM Placebo AGE mean Mean 75.209 + 3 ARM Placebo AGE sd SD 8.59 + 4 ARM Placebo AGE median Median 76 + 5 ARM Placebo AGE p25 Q1 69 + 6 ARM Placebo AGE p75 Q3 82 + 7 ARM Placebo AGE min Min 52 + 8 ARM Placebo AGE max Max 89 + 9 ARM Xanomeli… AGE N N 84 + 10 ARM Xanomeli… AGE mean Mean 74.381 + Message + i 14 more rows + i Use `print(n = ...)` to see more rows + i 4 more variables: context, fmt_fn, warning, error + +--- + + Code + ard_strata(ADSL, .strata = ARM, .f = ~ ard_continuous(.x, variables = AGE, by = AGEGR1)) + Message + {cards} data frame: 72 x 12 + Output + group1 group1_level group2 group2_level variable stat_name stat_label stat + 1 AGEGR1 65-80 ARM Placebo AGE N N 42 + 2 AGEGR1 65-80 ARM Placebo AGE mean Mean 73.595 + 3 AGEGR1 65-80 ARM Placebo AGE sd SD 4.173 + 4 AGEGR1 65-80 ARM Placebo AGE median Median 74 + 5 AGEGR1 65-80 ARM Placebo AGE p25 Q1 70 + 6 AGEGR1 65-80 ARM Placebo AGE p75 Q3 77 + 7 AGEGR1 65-80 ARM Placebo AGE min Min 65 + 8 AGEGR1 65-80 ARM Placebo AGE max Max 80 + 9 AGEGR1 <65 ARM Placebo AGE N N 14 + 10 AGEGR1 <65 ARM Placebo AGE mean Mean 61.143 + Message + i 62 more rows + i Use `print(n = ...)` to see more rows + i 4 more variables: context, fmt_fn, warning, error + diff --git a/tests/testthat/test-ard_strata.R b/tests/testthat/test-ard_strata.R new file mode 100644 index 00000000..159ac6ad --- /dev/null +++ b/tests/testthat/test-ard_strata.R @@ -0,0 +1,37 @@ +test_that("ard_strata() works", { + expect_snapshot( + ard_strata( + ADSL, + .by = ARM, + .f = ~ ard_continuous(.x, variables = AGE) + ) + ) + + expect_snapshot( + ard_strata( + ADSL, + .strata = ARM, + .f = ~ ard_continuous(.x, variables = AGE, by = AGEGR1) + ) + ) + + expect_equal( + ard_strata(ADSL, .by = ARM, .f = ~ ard_continuous(.x, by = c(SEX, AGEGR1), variables = AGE)) |> + tidy_ard_column_order() |> + tidy_ard_row_order(), + ard_continuous(ADSL, by = c(SEX, AGEGR1, ARM), variables = AGE) |> + tidy_ard_row_order() + ) +}) + +test_that("ard_strata(by,strata) when both empty", { + expect_equal( + ard_strata(ADSL, .f = ~ ard_continuous(.x, variables = AGE)), + ard_continuous(ADSL, variables = AGE) + ) + + expect_equal( + ard_strata(ADSL, .f = ~ ard_continuous(.x, by = ARM, variables = AGE)), + ard_continuous(ADSL, by = ARM, variables = AGE) + ) +})