From e027ad79b560c58b982338c475db68c974df6d7f Mon Sep 17 00:00:00 2001
From: Daniel Sjoberg <danield.sjoberg@gmail.com>
Date: Thu, 16 Jan 2025 10:19:58 -0800
Subject: [PATCH] Adding cumulative counts and percents to `ard_categorical()`
 (#373)

**What changes are proposed in this pull request?**
* The `ard_categorical()` function can now return cumulative counts and
percentages with `ard_categorical(statistic = varname ~ c('n_cum',
'p_cum'))`. (#145)

**Reference GitHub issue associated with pull request.** _e.g., 'closes
#<issue number>'_
closes #145


--------------------------------------------------------------------------------

Pre-review Checklist (if item does not apply, mark is as complete)
- [x] **All** GitHub Action workflows pass with a :white_check_mark:
- [x] PR branch has pulled the most recent updates from master branch:
`usethis::pr_merge_main()`
- [x] If a bug was fixed, a unit test was added.
- [x] Code coverage is suitable for any new functions/features
(generally, 100% coverage for new code): `devtools::test_coverage()`
- [x] Request a reviewer

Reviewer Checklist (if item does not apply, mark is as complete)

- [ ] If a bug was fixed, a unit test was added.
- [ ] Run `pkgdown::build_site()`. Check the R console for errors, and
review the rendered website.
- [ ] Code coverage is suitable for any new functions/features:
`devtools::test_coverage()`

When the branch is ready to be merged:
- [ ] Update `NEWS.md` with the changes from this pull request under the
heading "`# cards (development version)`". If there is an issue
associated with the pull request, reference it in parentheses at the end
update (see `NEWS.md` for examples).
- [ ] **All** GitHub Action workflows pass with a :white_check_mark:
- [ ] Approve Pull Request
- [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge".

_Optional Reverse Dependency Checks_:

- Install `checked` with `pak::pak("Genentech/checked")` or
`pak::pak("checked")`
- Check dev versions of `cardx`, `gtsummary`, and `tfrmt` which are in
the `ddsjoberg` R Universe

  ```shell
Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE));
checked::check_rev_deps(path = '.', repos =
c('https://ddsjoberg.r-universe.dev', 'https://cloud.r-project.org'))"
  ```

- Check CRAN reverse dependencies but run tests skipped on CRAN

  ```shell
Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE));
checked::check_rev_deps(path = '.', repos =
'https://cloud.r-project.org')"
  ```

- Check CRAN reverse dependencies in a CRAN-like environment

  ```shell
Rscript -e "options(checked.check_envvars = c(NOT_CRAN = FALSE),
checked.check_build_args = '--as-cran'); checked::check_rev_deps(path =
'.', repos = 'https://cloud.r-project.org')"
  ```

---------

Co-authored-by: Becca Krouse <14199771+bzkrouse@users.noreply.github.com>
---
 .github/PULL_REQUEST_TEMPLATE.md           |   6 +-
 .github/workflows/test-coverage.yaml       |  29 ++-
 DESCRIPTION                                |   2 +-
 R/ard_categorical.R                        |  94 +++++++--
 R/default_stat_labels.R                    |   2 +
 README.Rmd                                 |   2 +-
 README.md                                  |   2 +-
 man/ard_categorical.Rd                     |  28 ++-
 man/ard_dichotomous.Rd                     |  26 ++-
 man/ard_hierarchical.Rd                    |  26 ++-
 man/ard_stack_hierarchical.Rd              |   4 +-
 man/dot-calculate_tabulation_statistics.Rd |  10 +-
 man/dot-process_denominator.Rd             |   6 +-
 tests/testthat/_snaps/ard_categorical.md   |   9 +
 tests/testthat/test-ard_categorical.R      | 223 +++++++++++++++++++++
 15 files changed, 396 insertions(+), 73 deletions(-)
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 39ea650d4..4d84b3a4f 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -33,17 +33,17 @@ _Optional Reverse Dependency Checks_:
 - Check dev versions of `cardx`, `gtsummary`, and `tfrmt` which are in the `ddsjoberg` R Universe
 
   ```shell
-  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE)); checked::check_rev_deps(path = '.', repos = c('https://ddsjoberg.r-universe.dev', 'https://cloud.r-project.org'))"
+  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE)); checked::check_rev_deps(path = '.', n = parallel::detectCores() - 2L, repos = c('https://ddsjoberg.r-universe.dev', 'https://cloud.r-project.org'))"
   ```
 
 - Check CRAN reverse dependencies but run tests skipped on CRAN
 
   ```shell
-  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE)); checked::check_rev_deps(path = '.', repos = 'https://cloud.r-project.org')"
+  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = TRUE)); checked::check_rev_deps(path = '.', n = parallel::detectCores() - 2, repos = 'https://cloud.r-project.org')"
   ```
 
 - Check CRAN reverse dependencies in a CRAN-like environment
 
   ```shell
-  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = FALSE), checked.check_build_args = '--as-cran'); checked::check_rev_deps(path = '.', repos = 'https://cloud.r-project.org')"
+  Rscript -e "options(checked.check_envvars = c(NOT_CRAN = FALSE), checked.check_build_args = '--as-cran'); checked::check_rev_deps(path = '.', n = parallel::detectCores() - 2, repos = 'https://cloud.r-project.org')"
   ```
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 1bfd07001..e050312ff 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -4,9 +4,10 @@ on:
   push:
     branches: [main, master]
   pull_request:
-    branches: [main, master]
 
-name: test-coverage
+name: test-coverage.yaml
+
+permissions: read-all
 
 jobs:
   test-coverage:
@@ -15,7 +16,7 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -23,28 +24,38 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::covr
-          needs: coverage, check
+          extra-packages: any::covr, any::xml2
+          needs: coverage
 
       - name: Test coverage
         run: |
-          covr::codecov(
+          cov <- covr::package_coverage(
             quiet = FALSE,
             clean = FALSE,
-            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
           )
+          covr::to_cobertura(cov)
         shell: Rscript {0}
 
+      - uses: codecov/codecov-action@v4
+        with:
+          # Fail if error if not on PR, or if on PR and token is given
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
       - name: Show testthat output
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
index dd075745c..e396c71a3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,7 @@ Suggests:
     spelling (>= 2.2.0),
     testthat (>= 3.2.0),
     withr (>= 3.0.0)
-Config/Needs/check: hms
+Config/Needs/coverage: hms
 Config/Needs/website: rmarkdown, jsonlite, yaml, gtsummary, tfrmt,
     insightsengineering/nesttemplate
 Config/testthat/edition: 3
diff --git a/R/ard_categorical.R b/R/ard_categorical.R
index 9a0f672ad..f2ad03740 100644
--- a/R/ard_categorical.R
+++ b/R/ard_categorical.R
@@ -17,13 +17,13 @@
 #'   Arguments may be used in conjunction with one another.
 #' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
 #'   columns to include in summaries. Default is `everything()`.
-#' @param denominator (`data.frame`, `integer`)\cr
-#'   Specify this *optional* argument to change the denominator,
-#'   e.g. the `"N"` statistic. Default is `NULL`. See below for details.
+#' @param denominator (`string`, `data.frame`, `integer`)\cr
+#'   Specify this argument to change the denominator,
+#'   e.g. the `"N"` statistic. Default is `'column'`. See below for details.
 #' @param statistic ([`formula-list-selector`][syntax])\cr
 #'   a named list, a list of formulas,
-#'   or a single formula where the list element one or more of  `c("n", "N", "p")`
-#'   (or the RHS of a formula).
+#'   or a single formula where the list element one or more of  `c("n", "N", "p", "n_cum", "p_cum")`
+#'   (on the RHS of a formula).
 #' @param stat_label ([`formula-list-selector`][syntax])\cr
 #'   a named list, a list of formulas, or a single formula where
 #'   the list element is either a named list or a list of formulas defining the
@@ -45,14 +45,18 @@
 #' In such cases, use the `denominator` argument to specify a new definition
 #' of `"N"`, and subsequently `"p"`.
 #' The argument expects one of the following inputs:
+#' - a string: one of `"column"`, `"row"`, or `"cell"`.
+#'     - `"column"`, the default, returns percentages where the sum is equal to
+#'        one within the variable after the data frame has been subset with `by`/`strata`.
+#'     - `"row"` gives 'row' percentages where `by`/`strata` columns are the 'top'
+#'        of a cross table, and the variables are the rows. This is well-defined
+#'        for a single `by` or `strata` variable, and care must be taken when there
+#'        are more to ensure the the results are as you expect.
+#'   - `"cell"` gives percentages where the denominator is the number of non-missing
+#'        rows in the source data frame.
 #' - a data frame. Any columns in the data frame that overlap with the `by`/`strata`
 #'   columns will be used to calculate the new `"N"`.
 #' - an integer. This single integer will be used as the new `"N"`
-#' - a string: one of `"column"`, `"row"`, or `"cell"`. `"column"` is equivalent
-#'   to `denominator=NULL`. `"row"` gives 'row' percentages where `by`/`strata`
-#'   columns are the 'top' of a cross table, and the variables are the rows.
-#'   `"cell"` gives percentages where the denominator is the number of non-missing
-#'   rows in the source data frame.
 #' - a structured data frame. The data frame will include columns from `by`/`strata`.
 #'   The last column must be named `"...ard_N..."`. The integers in this column will
 #'   be used as the updated `"N"` in the calculations.
@@ -104,7 +108,7 @@ ard_categorical.data.frame <- function(data,
                                        by = dplyr::group_vars(data),
                                        strata = NULL,
                                        statistic = everything() ~ c("n", "p", "N"),
-                                       denominator = NULL,
+                                       denominator = "column",
                                        fmt_fn = NULL,
                                        stat_label = everything() ~ default_stat_labels(),
                                        ...) {
@@ -137,8 +141,8 @@ ard_categorical.data.frame <- function(data,
   )
   check_list_elements(
     x = statistic,
-    predicate = \(x) is.character(x) && all(x %in% c("n", "p", "N")),
-    error_msg = "Elements passed in the {.arg statistic} argument must be one or more of {.val {c('n', 'p', 'N')}}"
+    predicate = \(x) is.character(x) && all(x %in% c("n", "p", "N", "n_cum", "p_cum")),
+    error_msg = "Elements passed in the {.arg statistic} argument must be one or more of {.val {c('n', 'p', 'N', 'n_cum', 'p_cum')}}"
   )
 
   # return empty ARD if no variables selected ----------------------------------
@@ -247,7 +251,7 @@ ard_categorical.data.frame <- function(data,
         imap(
           statistics_tabulation,
           function(x, variable) {
-            if (any(c("N", "p") %in% x[["tabulation"]])) {
+            if (any(c("N", "p", "p_cum") %in% x[["tabulation"]])) {
               TRUE
             } else {
               NULL
@@ -282,7 +286,7 @@ ard_categorical.data.frame <- function(data,
               ))
             }
         }
-        if ("p" %in% tab_stats[["tabulation"]]) {
+        if (any(c("p", "p_cum") %in% tab_stats[["tabulation"]])) {
           df_result_tabulation <-
             df_result_tabulation |>
             dplyr::mutate(
@@ -290,14 +294,24 @@ ard_categorical.data.frame <- function(data,
             )
         }
 
+        df_result_tabulation <-
+          .add_cum_count_stats(
+            df_result_tabulation,
+            variable = variable,
+            by = by,
+            strata = strata,
+            denominator = denominator,
+            tab_stats = tab_stats
+          )
+
         df_result_tabulation |>
           .nesting_rename_ard_columns(variable = variable, by = by, strata = strata) |>
           dplyr::mutate(
-            across(any_of(c("...ard_n...", "...ard_N...", "...ard_p...")), as.list),
+            across(any_of(c("...ard_n...", "...ard_N...", "...ard_p...", "...ard_n_cum...", "...ard_p_cum...")), as.list),
             across(c(matches("^group[0-9]+_level$"), any_of("variable_level")), as.list)
           ) |>
           tidyr::pivot_longer(
-            cols = any_of(c("...ard_n...", "...ard_N...", "...ard_p...")),
+            cols = any_of(c("...ard_n...", "...ard_N...", "...ard_p...", "...ard_n_cum...", "...ard_p_cum...")),
             names_to = "stat_name",
             values_to = "stat"
           ) |>
@@ -334,6 +348,52 @@ ard_categorical.data.frame <- function(data,
   )
 }
 
+
+
+.add_cum_count_stats <- function(x, variable, by, strata, denominator, tab_stats) {
+  # if no cumulative stats were requested, return the object
+  if (!any(c("p_cum", "n_cum") %in% tab_stats[["tabulation"]])) {
+    return(x)
+  }
+
+  # to return cumulative stats, the denominator must be 'column' or 'row'
+  if (!is_string(denominator) || !denominator %in% c("column", "row")) {
+    cli::cli_abort(
+      "The {.arg denominator} argument must be one of {.val {c(\"column\", \"row\")}}
+       when cumulative statistics {.val n_cum} or {.val p_cum} are specified, which
+       were requested for variable {.var {variable}}.",
+      call = get_cli_abort_call()
+    )
+  }
+
+  # calculate the cumulative statistics
+  if (denominator %in% "column") {
+    x <- x |>
+      dplyr::mutate(
+        .by = any_of(c(by, strata)),
+        ...ard_n_cum... = switch("n_cum" %in% tab_stats[["tabulation"]],
+          cumsum(.data$...ard_n...)
+        ),
+        ...ard_p_cum... = switch("p_cum" %in% tab_stats[["tabulation"]],
+          cumsum(.data$...ard_p...)
+        )
+      )
+  } else if (denominator %in% "row") {
+    x <- x |>
+      dplyr::mutate(
+        .by = any_of(variable),
+        ...ard_n_cum... = switch("n_cum" %in% tab_stats[["tabulation"]],
+          cumsum(.data$...ard_n...)
+        ),
+        ...ard_p_cum... = switch("p_cum" %in% tab_stats[["tabulation"]],
+          cumsum(.data$...ard_p...)
+        )
+      )
+  }
+
+  x
+}
+
 #' Results from `table()` as Data Frame
 #'
 #' Takes the results from [table()] and returns them as a data frame.
diff --git a/R/default_stat_labels.R b/R/default_stat_labels.R
index 082e3d28d..1bffdb780 100644
--- a/R/default_stat_labels.R
+++ b/R/default_stat_labels.R
@@ -21,6 +21,8 @@ default_stat_labels <- function() {
     n = "n",
     N = "N",
     p = "%",
+    n_cum = "Cumulative n",
+    p_cum = "Cumulative %",
     N_obs = "Vector Length",
     N_miss = "N Missing",
     N_nonmiss = "N Non-missing",
diff --git a/README.Rmd b/README.Rmd
index d1e593fbc..b75eed65c 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -17,7 +17,7 @@ knitr::opts_chunk$set(
 
 <!-- badges: start -->
 [![CRAN status](https://www.r-pkg.org/badges/version/cards)](https://CRAN.R-project.org/package=cards)
-[![Codecov test coverage](https://codecov.io/gh/insightsengineering/cards/branch/main/graph/badge.svg)](https://app.codecov.io/gh/insightsengineering/cards?branch=main)
+[![Codecov test coverage](https://codecov.io/gh/insightsengineering/cards/graph/badge.svg)](https://app.codecov.io/gh/insightsengineering/cards)
 [![Downloads](https://cranlogs.r-pkg.org/badges/cards)](https://cran.r-project.org/package=cards)
 [![R-CMD-check](https://github.com/insightsengineering/cards/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/insightsengineering/cards/actions/workflows/R-CMD-check.yaml)
 [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
diff --git a/README.md b/README.md
index c5dcaa031..0e8ca51c8 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 [![CRAN
 status](https://www.r-pkg.org/badges/version/cards)](https://CRAN.R-project.org/package=cards)
 [![Codecov test
-coverage](https://codecov.io/gh/insightsengineering/cards/branch/main/graph/badge.svg)](https://app.codecov.io/gh/insightsengineering/cards?branch=main)
+coverage](https://codecov.io/gh/insightsengineering/cards/graph/badge.svg)](https://app.codecov.io/gh/insightsengineering/cards)
 [![Downloads](https://cranlogs.r-pkg.org/badges/cards)](https://cran.r-project.org/package=cards)
 [![R-CMD-check](https://github.com/insightsengineering/cards/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/insightsengineering/cards/actions/workflows/R-CMD-check.yaml)
 [![Lifecycle:
diff --git a/man/ard_categorical.Rd b/man/ard_categorical.Rd
index 597162d51..df1513c93 100644
--- a/man/ard_categorical.Rd
+++ b/man/ard_categorical.Rd
@@ -13,7 +13,7 @@ ard_categorical(data, ...)
   by = dplyr::group_vars(data),
   strata = NULL,
   statistic = everything() ~ c("n", "p", "N"),
-  denominator = NULL,
+  denominator = "column",
   fmt_fn = NULL,
   stat_label = everything() ~ default_stat_labels(),
   ...
@@ -42,12 +42,12 @@ Arguments may be used in conjunction with one another.}
 
 \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
-or a single formula where the list element one or more of  \code{c("n", "N", "p")}
-(or the RHS of a formula).}
+or a single formula where the list element one or more of  \code{c("n", "N", "p", "n_cum", "p_cum")}
+(on the RHS of a formula).}
 
-\item{denominator}{(\code{data.frame}, \code{integer})\cr
-Specify this \emph{optional} argument to change the denominator,
-e.g. the \code{"N"} statistic. Default is \code{NULL}. See below for details.}
+\item{denominator}{(\code{string}, \code{data.frame}, \code{integer})\cr
+Specify this argument to change the denominator,
+e.g. the \code{"N"} statistic. Default is \code{'column'}. See below for details.}
 
 \item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
@@ -83,14 +83,20 @@ In such cases, use the \code{denominator} argument to specify a new definition
 of \code{"N"}, and subsequently \code{"p"}.
 The argument expects one of the following inputs:
 \itemize{
+\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}.
+\itemize{
+\item \code{"column"}, the default, returns percentages where the sum is equal to
+one within the variable after the data frame has been subset with \code{by}/\code{strata}.
+\item \code{"row"} gives 'row' percentages where \code{by}/\code{strata} columns are the 'top'
+of a cross table, and the variables are the rows. This is well-defined
+for a single \code{by} or \code{strata} variable, and care must be taken when there
+are more to ensure the the results are as you expect.
+\item \code{"cell"} gives percentages where the denominator is the number of non-missing
+rows in the source data frame.
+}
 \item a data frame. Any columns in the data frame that overlap with the \code{by}/\code{strata}
 columns will be used to calculate the new \code{"N"}.
 \item an integer. This single integer will be used as the new \code{"N"}
-\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}. \code{"column"} is equivalent
-to \code{denominator=NULL}. \code{"row"} gives 'row' percentages where \code{by}/\code{strata}
-columns are the 'top' of a cross table, and the variables are the rows.
-\code{"cell"} gives percentages where the denominator is the number of non-missing
-rows in the source data frame.
 \item a structured data frame. The data frame will include columns from \code{by}/\code{strata}.
 The last column must be named \code{"...ard_N..."}. The integers in this column will
 be used as the updated \code{"N"} in the calculations.
diff --git a/man/ard_dichotomous.Rd b/man/ard_dichotomous.Rd
index 39e320cbe..1ca5e8f67 100644
--- a/man/ard_dichotomous.Rd
+++ b/man/ard_dichotomous.Rd
@@ -47,12 +47,12 @@ which returns the largest/last value after a sort.}
 
 \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
-or a single formula where the list element one or more of  \code{c("n", "N", "p")}
-(or the RHS of a formula).}
+or a single formula where the list element one or more of  \code{c("n", "N", "p", "n_cum", "p_cum")}
+(on the RHS of a formula).}
 
-\item{denominator}{(\code{data.frame}, \code{integer})\cr
-Specify this \emph{optional} argument to change the denominator,
-e.g. the \code{"N"} statistic. Default is \code{NULL}. See below for details.}
+\item{denominator}{(\code{string}, \code{data.frame}, \code{integer})\cr
+Specify this argument to change the denominator,
+e.g. the \code{"N"} statistic. Default is \code{'column'}. See below for details.}
 
 \item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
@@ -88,14 +88,20 @@ In such cases, use the \code{denominator} argument to specify a new definition
 of \code{"N"}, and subsequently \code{"p"}.
 The argument expects one of the following inputs:
 \itemize{
+\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}.
+\itemize{
+\item \code{"column"}, the default, returns percentages where the sum is equal to
+one within the variable after the data frame has been subset with \code{by}/\code{strata}.
+\item \code{"row"} gives 'row' percentages where \code{by}/\code{strata} columns are the 'top'
+of a cross table, and the variables are the rows. This is well-defined
+for a single \code{by} or \code{strata} variable, and care must be taken when there
+are more to ensure the the results are as you expect.
+\item \code{"cell"} gives percentages where the denominator is the number of non-missing
+rows in the source data frame.
+}
 \item a data frame. Any columns in the data frame that overlap with the \code{by}/\code{strata}
 columns will be used to calculate the new \code{"N"}.
 \item an integer. This single integer will be used as the new \code{"N"}
-\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}. \code{"column"} is equivalent
-to \code{denominator=NULL}. \code{"row"} gives 'row' percentages where \code{by}/\code{strata}
-columns are the 'top' of a cross table, and the variables are the rows.
-\code{"cell"} gives percentages where the denominator is the number of non-missing
-rows in the source data frame.
 \item a structured data frame. The data frame will include columns from \code{by}/\code{strata}.
 The last column must be named \code{"...ard_N..."}. The integers in this column will
 be used as the updated \code{"N"} in the calculations.
diff --git a/man/ard_hierarchical.Rd b/man/ard_hierarchical.Rd
index 89c1927b9..9c93b9687 100644
--- a/man/ard_hierarchical.Rd
+++ b/man/ard_hierarchical.Rd
@@ -47,12 +47,12 @@ specified here appear in results. Default is \code{dplyr::group_vars(data)}.}
 
 \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
-or a single formula where the list element one or more of  \code{c("n", "N", "p")}
-(or the RHS of a formula).}
+or a single formula where the list element one or more of  \code{c("n", "N", "p", "n_cum", "p_cum")}
+(on the RHS of a formula).}
 
-\item{denominator}{(\code{data.frame}, \code{integer})\cr
-Specify this \emph{optional} argument to change the denominator,
-e.g. the \code{"N"} statistic. Default is \code{NULL}. See below for details.}
+\item{denominator}{(\code{string}, \code{data.frame}, \code{integer})\cr
+Specify this argument to change the denominator,
+e.g. the \code{"N"} statistic. Default is \code{'column'}. See below for details.}
 
 \item{fmt_fn}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
@@ -102,14 +102,20 @@ In such cases, use the \code{denominator} argument to specify a new definition
 of \code{"N"}, and subsequently \code{"p"}.
 The argument expects one of the following inputs:
 \itemize{
+\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}.
+\itemize{
+\item \code{"column"}, the default, returns percentages where the sum is equal to
+one within the variable after the data frame has been subset with \code{by}/\code{strata}.
+\item \code{"row"} gives 'row' percentages where \code{by}/\code{strata} columns are the 'top'
+of a cross table, and the variables are the rows. This is well-defined
+for a single \code{by} or \code{strata} variable, and care must be taken when there
+are more to ensure the the results are as you expect.
+\item \code{"cell"} gives percentages where the denominator is the number of non-missing
+rows in the source data frame.
+}
 \item a data frame. Any columns in the data frame that overlap with the \code{by}/\code{strata}
 columns will be used to calculate the new \code{"N"}.
 \item an integer. This single integer will be used as the new \code{"N"}
-\item a string: one of \code{"column"}, \code{"row"}, or \code{"cell"}. \code{"column"} is equivalent
-to \code{denominator=NULL}. \code{"row"} gives 'row' percentages where \code{by}/\code{strata}
-columns are the 'top' of a cross table, and the variables are the rows.
-\code{"cell"} gives percentages where the denominator is the number of non-missing
-rows in the source data frame.
 \item a structured data frame. The data frame will include columns from \code{by}/\code{strata}.
 The last column must be named \code{"...ard_N..."}. The integers in this column will
 be used as the updated \code{"N"} in the calculations.
diff --git a/man/ard_stack_hierarchical.Rd b/man/ard_stack_hierarchical.Rd
index 006b94570..4dd66c630 100644
--- a/man/ard_stack_hierarchical.Rd
+++ b/man/ard_stack_hierarchical.Rd
@@ -69,8 +69,8 @@ summary statistics will be returned. Default is \code{everything()}.}
 
 \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
-or a single formula where the list element one or more of  \code{c("n", "N", "p")}
-(or the RHS of a formula).}
+or a single formula where the list element one or more of  \code{c("n", "N", "p", "n_cum", "p_cum")}
+(on the RHS of a formula).}
 
 \item{overall}{(scalar \code{logical})\cr logical indicating whether overall statistics
 should be calculated (i.e. repeat the operations with \code{by=NULL} in \emph{most cases}, see below for details).
diff --git a/man/dot-calculate_tabulation_statistics.Rd b/man/dot-calculate_tabulation_statistics.Rd
index deb3f2cbd..d452180cd 100644
--- a/man/dot-calculate_tabulation_statistics.Rd
+++ b/man/dot-calculate_tabulation_statistics.Rd
@@ -32,14 +32,14 @@ columns specified.
 
 Arguments may be used in conjunction with one another.}
 
-\item{denominator}{(\code{data.frame}, \code{integer})\cr
-Specify this \emph{optional} argument to change the denominator,
-e.g. the \code{"N"} statistic. Default is \code{NULL}. See below for details.}
+\item{denominator}{(\code{string}, \code{data.frame}, \code{integer})\cr
+Specify this argument to change the denominator,
+e.g. the \code{"N"} statistic. Default is \code{'column'}. See below for details.}
 
 \item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr
 a named list, a list of formulas,
-or a single formula where the list element one or more of  \code{c("n", "N", "p")}
-(or the RHS of a formula).}
+or a single formula where the list element one or more of  \code{c("n", "N", "p", "n_cum", "p_cum")}
+(on the RHS of a formula).}
 }
 \value{
 an ARD data frame of class 'card'
diff --git a/man/dot-process_denominator.Rd b/man/dot-process_denominator.Rd
index 82f9b2997..444fa5795 100644
--- a/man/dot-process_denominator.Rd
+++ b/man/dot-process_denominator.Rd
@@ -13,9 +13,9 @@ a data frame}
 \item{variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr
 columns to include in summaries. Default is \code{everything()}.}
 
-\item{denominator}{(\code{data.frame}, \code{integer})\cr
-Specify this \emph{optional} argument to change the denominator,
-e.g. the \code{"N"} statistic. Default is \code{NULL}. See below for details.}
+\item{denominator}{(\code{string}, \code{data.frame}, \code{integer})\cr
+Specify this argument to change the denominator,
+e.g. the \code{"N"} statistic. Default is \code{'column'}. See below for details.}
 
 \item{by, strata}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr
 columns to use for grouping or stratifying the table output.
diff --git a/tests/testthat/_snaps/ard_categorical.md b/tests/testthat/_snaps/ard_categorical.md
index 01ee08197..1b9abe7ff 100644
--- a/tests/testthat/_snaps/ard_categorical.md
+++ b/tests/testthat/_snaps/ard_categorical.md
@@ -184,3 +184,12 @@
       Error in `ard_categorical()`:
       ! Factors with NA levels are not allowed, which are present in column "am".
 
+# ard_categorical() with cumulative counts messaging
+
+    Code
+      ard_categorical(ADSL, variables = "AGEGR1", by = SEX, statistic = everything() ~
+        c("n", "p", "n_cum", "p_cum"), denominator = NULL)
+    Condition
+      Error in `ard_categorical()`:
+      ! The `denominator` argument must be one of "column" and "row" when cumulative statistics "n_cum" or "p_cum" are specified, which were requested for variable `AGEGR1`.
+
diff --git a/tests/testthat/test-ard_categorical.R b/tests/testthat/test-ard_categorical.R
index 6c30dbdd2..690107c31 100644
--- a/tests/testthat/test-ard_categorical.R
+++ b/tests/testthat/test-ard_categorical.R
@@ -888,6 +888,229 @@ test_that("ard_categorical() errors with incomplete factor columns", {
   )
 })
 
+test_that("ard_categorical(denominator='column') with cumulative counts", {
+  # check cumulative stats work without `by/strata`
+  expect_silent(
+    ard <-
+      ard_categorical(
+        ADSL,
+        variables = "AGEGR1",
+        statistic = everything() ~ c("n", "p", "n_cum", "p_cum")
+      )
+  )
+  # test the final cum n matches the nrow()
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name == "n_cum", variable_level %in% dplyr::last(.unique_and_sorted(ADSL$AGEGR1))) |>
+      dplyr::pull(stat) |>
+      unlist(),
+    nrow(ADSL)
+  )
+  # test the final cum p is 1
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name == "p_cum", variable_level %in% dplyr::last(.unique_and_sorted(ADSL$AGEGR1))) |>
+      dplyr::pull(stat) |>
+      unlist(),
+    1
+  )
+  # check the cum n is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "n_cum") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1) |>
+      cumsum() |>
+      as.list()
+  )
+  # check the cum p is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "p_cum") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1) |>
+      prop.table() |>
+      cumsum() |>
+      as.list()
+  )
+
+  # check cumulative stats work with `by`
+  expect_silent(
+    ard <-
+      ard_categorical(
+        ADSL,
+        variables = "AGEGR1",
+        by = ARM,
+        statistic = everything() ~ c("n", "p", "n_cum", "p_cum")
+      )
+  )
+  # check the cum n is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "n_cum", group1_level == "Placebo") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1[ADSL$ARM == "Placebo"]) |>
+      cumsum() |>
+      as.list()
+  )
+  # check the cum p is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "p_cum", group1_level == "Placebo") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1[ADSL$ARM == "Placebo"]) |>
+      prop.table() |>
+      cumsum() |>
+      as.list()
+  )
+
+  # check with by & strata
+  expect_silent(
+    ard <-
+      ard_categorical(
+        ADSL,
+        variables = "AGEGR1",
+        by = ARM,
+        strata = SEX,
+        statistic = everything() ~ c("n", "p", "n_cum", "p_cum")
+      )
+  )
+  # check the cum n is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "n_cum", group1_level == "Placebo", group2_level == "F") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1[ADSL$ARM == "Placebo" & ADSL$SEX == "F"]) |>
+      cumsum() |>
+      as.list()
+  )
+  # check the cum p is correct
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% "p_cum", group1_level == "Placebo", group2_level == "F") |>
+      dplyr::select(variable_level, stat) |>
+      deframe(),
+    table(ADSL$AGEGR1[ADSL$ARM == "Placebo" & ADSL$SEX == "F"]) |>
+      prop.table() |>
+      cumsum() |>
+      as.list()
+  )
+
+  # function works when only `n_cum` requested
+  expect_equal(
+    ard_categorical(
+      ADSL,
+      variables = "AGEGR1",
+      statistic = everything() ~ "n_cum"
+    ),
+    ard_categorical(
+      ADSL,
+      variables = "AGEGR1",
+      statistic = everything() ~ c("n", "p", "n_cum", "p_cum")
+    ) |>
+      dplyr::filter(stat_name == "n_cum")
+  )
+  # function works when only `p_cum` requested
+  expect_equal(
+    ard_categorical(
+      ADSL,
+      variables = "AGEGR1",
+      statistic = everything() ~ "p_cum"
+    ),
+    ard_categorical(
+      ADSL,
+      variables = "AGEGR1",
+      statistic = everything() ~ c("n", "p", "n_cum", "p_cum")
+    ) |>
+      dplyr::filter(stat_name == "p_cum")
+  )
+})
+
+test_that("ard_categorical(denominator='row') with cumulative counts", {
+  # check cumulative stats work without `by/strata`
+  expect_silent(
+    ard <-
+      ard_categorical(
+        ADSL,
+        variables = "AGEGR1",
+        statistic = everything() ~ c("n", "p", "n_cum", "p_cum"),
+        denominator = "row"
+      )
+  )
+  # when no by, the n and n_cum should be the same
+  expect_true(
+    ard |>
+      dplyr::filter(stat_name %in% c("n", "n_cum")) |>
+      dplyr::mutate(
+        .by = all_ard_variables(),
+        check_equal = unlist(stat) == unlist(stat)[1]
+      ) |>
+      dplyr::pull(check_equal) |>
+      unique()
+  )
+  # when no by, the p and p_cum should be the same and equal to 1
+  expect_equal(
+    ard |>
+      dplyr::filter(stat_name %in% c("p", "p_cum")) |>
+      dplyr::pull(stat) |>
+      unlist() |>
+      unique(),
+    1
+  )
+
+  # check cumulative stats work with `by`
+  expect_silent(
+    ard <-
+      ard_categorical(
+        ADSL,
+        variables = "AGEGR1",
+        by = SEX,
+        statistic = everything() ~ c("n", "p", "n_cum", "p_cum"),
+        denominator = "row"
+      )
+  )
+  # check row n_cum
+  expect_equal(
+    ard |>
+      dplyr::filter(variable_level %in% "<65", stat_name == "n_cum") |>
+      dplyr::select(group1_level, stat) |>
+      deframe(),
+    table(ADSL$SEX[ADSL$AGEGR1 == "<65"]) |>
+      cumsum() |>
+      as.list()
+  )
+  # check row p_cum
+  expect_equal(
+    ard |>
+      dplyr::filter(variable_level %in% "<65", stat_name == "p_cum") |>
+      dplyr::select(group1_level, stat) |>
+      deframe(),
+    table(ADSL$SEX[ADSL$AGEGR1 == "<65"]) |>
+      prop.table() |>
+      cumsum() |>
+      as.list()
+  )
+})
+
+test_that("ard_categorical() with cumulative counts messaging", {
+  # cumulative counts/percents only available when `denominator=c('column', 'row')`
+  expect_snapshot(
+    error = TRUE,
+    ard_categorical(
+      ADSL,
+      variables = "AGEGR1",
+      by = SEX,
+      statistic = everything() ~ c("n", "p", "n_cum", "p_cum"),
+      denominator = NULL
+    )
+  )
+})
+
 test_that("ard_categorical() ordering for multiple strata", {
   adae_mini <- ADAE |>
     dplyr::select(USUBJID, TRTA, AESOC, AEDECOD) |>