insightsengineering · edelarua · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/R/analyze_variables.R b/R/analyze_variables.R
@@ -602,10 +602,13 @@ a_summary <- function(x,
   )
 
   x_stats <- x_stats[.stats]
+
   if (is.character(x) || is.factor(x)) {
     levels_per_stats <- lapply(x_stats, names) # if there is a count is table() with levels
+    rep_lbl <- sapply(names(levels_per_stats), function(x) levels_per_stats[x] == x)
+    levels_per_stats[rep_lbl] <- list(NULL)
   } else {
-    levels_per_stats <- NULL
+    levels_per_stats <- names(x_stats)
   }
 
   # Formats checks

diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R
@@ -221,7 +221,7 @@ get_stat_names <- function(stat_results, stat_names_in = NULL) {
 #'   character vector from [formatters::list_valid_format_labels()] or a custom format function.
 #'
 #' @return
-#' * `get_formats_from_stats()` returns a named vector of formats (if present in either
+#' * `get_formats_from_stats()` returns a named list of formats (if present in either
 #'   `tern_default_formats` or `formats_in`, otherwise `NULL`). Values can be taken from
 #'   [formatters::list_valid_format_labels()] or a custom function (e.g. [formatting_functions]).
 #'
@@ -252,21 +252,15 @@ get_formats_from_stats <- function(stats, formats_in = NULL) {
     checkmate::assert_character(formats_in, null.ok = TRUE)
   }
 
-  # Extract global defaults
-  which_fmt <- match(stats, names(tern_default_formats))
+  # Add dummy formats for each row
+  formats_stats <- rep(NA_character_, length(stats)) %>% setNames(stats)
 
-  # Select only needed formats from stats
-  ret <- vector("list", length = length(stats)) # Returning a list is simpler
-  ret[!is.na(which_fmt)] <- tern_default_formats[which_fmt[!is.na(which_fmt)]]
+  # Apply custom formats
+  out <- .adjust_stats_desc_by_in_def(formats_stats, formats_in, tern_default_formats)
 
-  out <- setNames(ret, stats)
-
-  # Modify some with custom formats
-  if (!is.null(formats_in)) {
-    # Stats is the main
-    common_names <- intersect(names(out), names(formats_in))
-    out[common_names] <- formats_in[common_names]
-  }
+  # Set missing formats to NULL
+  null_stats <- sapply(out, is.na) %>% suppressWarnings()
+  out[null_stats] <- list(NULL)
 
   out
 }
@@ -309,30 +303,23 @@ get_labels_from_stats <- function(stats, labels_in = NULL, levels_per_stats = NU
     checkmate::assert_character(labels_in, null.ok = TRUE)
   }
 
-  # Default for stats with sublevels (for factors or chrs) are the labels
+  # Apply default labels for each row
   if (!is.null(levels_per_stats)) {
     if (is.null(names(levels_per_stats))) {
-      names(levels_per_stats) <- levels_per_stats
+      levels_per_stats <- rep(NA_character_, length(levels_per_stats)) %>% setNames(levels_per_stats)
     }
-    out <- .adjust_stats_desc_by_in_def(levels_per_stats, labels_in, tern_default_labels)
-    # numeric case, where there are not other levels (list of stats)
   } else {
-    which_lbl <- match(stats, names(tern_default_labels))
-
-    ret <- stats # The default
-    ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]]
+    levels_per_stats <- rep(NA_character_, length(stats)) %>% setNames(stats)
+  }
 
-    out <- setNames(ret, stats)
+  # Apply custom labels
+  out <- .adjust_stats_desc_by_in_def(levels_per_stats, labels_in, tern_default_labels)
 
-    # Modify some with custom labels
-    if (!is.null(labels_in)) {
-      # Stats is the main
-      common_names <- intersect(names(out), names(labels_in))
-      out[common_names] <- unlist(labels_in[common_names], recursive = FALSE)
-    }
-  }
+  # Set missing labels to stat name
+  no_label <- sapply(out, is.na) %>% suppressWarnings()
+  out[no_label] <- names(out[no_label])
 
-  out
+  out %>% unlist()
 }
 
 #' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics.
@@ -370,99 +357,66 @@ get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) {
     return(out)
   }
 
-  # Apply default indentation
-  if (is.null(row_nms)) {
-    out <- setNames(rep(0L, length(stats)), stats)
+  # Get default indentation
+  all_nms <- if (is.null(row_nms)) {
+    stats
   } else {
-    all_nms <- paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")
-    out <- setNames(rep(0L, length(stats) * length(row_nms)), stats)
+    paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")
   }
+  def_indent <- rep(0L, length(all_nms)) %>% setNames(all_nms)
 
-  # Modify with custom indentation
-  if (!is.null(indents_in)) {
-    if (is.null(row_nms)) { # One row per statistic
-      common_names <- intersect(names(out), names(indents_in))
-      out[common_names] <- indents_in[common_names]
-    } else if (!is.null(row_nms)) { # One row per combination of variable level and statistic
-      out <- sapply(
-        all_nms,
-        function(x) {
-          if (x %in% names(indents_in)) {
-            indents_in[[x]]
-          } else {
-            stat_lvl <- regmatches(x, regexpr("[.]", x), invert = TRUE)[[1]]
-            stat <- stat_lvl[1]
-            lvl <- stat_lvl[2]
-            if (lvl %in% names(indents_in)) {
-              indents_in[[lvl]]
-            } else if (stat %in% names(indents_in)) {
-              indents_in[[stat]]
-            } else {
-              0
-            }
-          }
-        }
-      )
-    }
-  }
-
-  out
+  # Apply custom indentation
+  out <- .adjust_stats_desc_by_in_def(def_indent, indents_in, NULL)
+  out %>% unlist()
 }
 
 # Function to loop over each stat and levels to set correct values
+# levels_per_stats - every combo of statistic & level must be represented
+# tern_defaults - one per statistic (names are statistic names)
+# Order of precedence by info present in name: level and stat > level > stat > other defaults
 .adjust_stats_desc_by_in_def <- function(levels_per_stats, user_in, tern_defaults) {
+  single_stats <- any(names(levels_per_stats) %in% names(tern_defaults))
+  if (is.list(levels_per_stats)) {
+    if (any(names(levels_per_stats) %in% names(tern_defaults))) single_stats <- FALSE
+    null_stats <- sapply(levels_per_stats, is.null)
+    levels_per_stats[null_stats] <- ""
+    levels_per_stats <- lapply(levels_per_stats, function(x) x %>% setNames(x)) %>% unlist()
+    levels_per_stats[names(which(null_stats))] <- NA_character_
+  }
   out <- levels_per_stats
 
-  # Seq over the stats levels (can be also flat (stat$NULL))
-  for (stat_i in seq_along(levels_per_stats)) {
-    # If you want to change all factor levels at once by statistic
-    common_stat_names <- intersect(names(levels_per_stats), names(user_in))
-
-    # Levels for each statistic
-    nm_of_levs <- levels_per_stats[[stat_i]]
-    # Special case in which only stat$NULL
-    if (is.null(nm_of_levs)) {
-      nm_of_levs <- "a single NULL level"
-    }
-
-    # Loop over levels for each statistic
-    for (lev_i in seq_along(nm_of_levs)) {
-      # If there are no further names (stat$NULL) push label (stat) down to lowest level
-      if (is.null(levels_per_stats[[stat_i]])) {
-        lev_val <- names(levels_per_stats[stat_i])
-        out[[stat_i]] <- lev_val
-      } else {
-        lev_val <- levels_per_stats[[stat_i]][[lev_i]]
-      }
-
-      # Add default if it is a stat at last level
-      if (lev_val %in% names(tern_defaults)) {
-        out[[stat_i]][[lev_i]] <- tern_defaults[[lev_val]]
-      }
-
-      # If a general stat was added to the custom labels
-      if (names(levels_per_stats[stat_i]) %in% names(user_in)) {
-        out[[stat_i]][[lev_i]] <- user_in[[names(levels_per_stats[stat_i])]]
-      }
-
-      # If a stat level (e.g. if it is counts levels from table) was added to the custom labels
-      if (lev_val %in% names(user_in)) {
-        out[[stat_i]][[lev_i]] <- user_in[[lev_val]]
-      }
-
-      # If stat_i.lev_val is added to labels_in
-      composite_stat_lev_nm <- paste(
-        names(levels_per_stats[stat_i]),
-        lev_val,
-        sep = "."
-      )
-      if (composite_stat_lev_nm %in% names(user_in)) {
-        out[[stat_i]][[lev_i]] <- user_in[[composite_stat_lev_nm]]
+  if (!single_stats) { # One row per combination of variable level and statistic
+    out <- sapply(
+      names(levels_per_stats),
+      function(x) {
+        if (x %in% names(user_in)) {
+          user_in[[x]]
+        } else {
+          stat_lvl <- regmatches(x, regexpr("[.]", x), invert = TRUE)[[1]]
+          stat <- stat_lvl[1]
+          lvl <- stat_lvl[2]
+          if (lvl %in% names(user_in)) {
+            user_in[[lvl]]
+          } else if (stat %in% names(user_in)) {
+            user_in[[stat]]
+          } else { # fill in gaps with tern defaults
+            if ((is.null(out[[x]]) | is.na(out[[x]]))) {
+              if (stat %in% names(tern_defaults)) tern_defaults[[stat]] else x
+            } else {
+              out[[x]]
+            }
+          }
+        }
       }
-
-      # Used by the unlist (to avoid count_fraction1, count_fraction2, etc.)
-      names(out[[stat_i]])[lev_i] <- lev_val
+    )
+  } else { # One row per statistic
+    if (!is.null(user_in)) {
+      common_stats <- intersect(names(out), names(user_in))
+      out[common_stats] <- user_in[common_stats]
     }
+    # fill in gaps with tern defaults
+    common_stats <- intersect(names(out[is.null(out) | is.na(out)]), names(tern_defaults))
+    out[common_stats] <- tern_defaults[common_stats]
   }
 
   out

diff --git a/tests/testthat/_snaps/analyze_variables.md b/tests/testthat/_snaps/analyze_variables.md
@@ -1402,7 +1402,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                           row_name formatted_cell indent_mod row_label
-      1                        n.n              5          0         n
+      1                          n              5          0         n
       2                    count.a              3          0         a
       3                    count.b              1          0         b
       4                    count.c              1          0         c
@@ -1415,7 +1415,7 @@
       11                fraction.a    3/5 (60.0%)          0         a
       12                fraction.b    1/5 (20.0%)          0         b
       13                fraction.c    1/5 (20.0%)          0         c
-      14               n_blq.n_blq              0          0     n_blq
+      14                     n_blq              0          0     n_blq
 
 ---
 
@@ -1425,7 +1425,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                           row_name formatted_cell indent_mod row_label
-      1                        n.n              4          0         n
+      1                          n              4          0         n
       2                    count.A              2          0         A
       3                    count.B              1          0         B
       4                    count.C              1          0         C
@@ -1438,7 +1438,7 @@
       11                fraction.A    2/4 (50.0%)          0         A
       12                fraction.B    1/4 (25.0%)          0         B
       13                fraction.C    1/4 (25.0%)          0         C
-      14               n_blq.n_blq              0          0     n_blq
+      14                     n_blq              0          0     n_blq
 
 ---
 
@@ -1474,7 +1474,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                            row_name formatted_cell indent_mod         row_label
-      1                         n.n           5.00         -1 number of records
+      1                           n           5.00         -1 number of records
       2                     count.a              2          5                 a
       3                     count.b              1          5                 b
       4                     count.c              1          5                 c
@@ -1491,7 +1491,7 @@
       15                 fraction.b    1/5 (20.0%)          0                 b
       16                 fraction.c    1/5 (20.0%)          0                 c
       17                fraction.NA    1/5 (20.0%)          0                NA
-      18                n_blq.n_blq              0          0             n_blq
+      18                      n_blq              0          0             n_blq
 
 # a_summary works with healthy input when compare_with_ref_group = TRUE.
 
@@ -1538,7 +1538,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                           row_name formatted_cell indent_mod                  row_label
-      1                        n.n              5          0                          n
+      1                          n              5          0                          n
       2                    count.a              3          0                          a
       3                    count.b              1          0                          b
       4                    count.c              1          0                          c
@@ -1551,8 +1551,8 @@
       11                fraction.a    3/5 (60.0%)          0                          a
       12                fraction.b    1/5 (20.0%)          0                          b
       13                fraction.c    1/5 (20.0%)          0                          c
-      14               n_blq.n_blq              0          0                      n_blq
-      15   pval_counts.pval_counts         0.9560          0 p-value (chi-squared test)
+      14                     n_blq              0          0                      n_blq
+      15               pval_counts         0.9560          0 p-value (chi-squared test)
 
 ---
 
@@ -1562,7 +1562,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                           row_name formatted_cell indent_mod                  row_label
-      1                        n.n              4          0                          n
+      1                          n              4          0                          n
       2                    count.A              2          0                          A
       3                    count.B              1          0                          B
       4                    count.C              1          0                          C
@@ -1575,8 +1575,8 @@
       11                fraction.A    2/4 (50.0%)          0                          A
       12                fraction.B    1/4 (25.0%)          0                          B
       13                fraction.C    1/4 (25.0%)          0                          C
-      14               n_blq.n_blq              0          0                      n_blq
-      15   pval_counts.pval_counts         0.9074          0 p-value (chi-squared test)
+      14                     n_blq              0          0                      n_blq
+      15               pval_counts         0.9074          0 p-value (chi-squared test)
 
 ---
 
@@ -1613,7 +1613,7 @@
       RowsVerticalSection (in_rows) object print method:
       ----------------------------
                            row_name formatted_cell indent_mod                  row_label
-      1                         n.n           5.00         -1          number of records
+      1                           n           5.00         -1          number of records
       2                     count.a              2          5                          a
       3                     count.b              1          5                          b
       4                     count.c              1          5                          c
@@ -1630,8 +1630,8 @@
       15                 fraction.b    1/5 (20.0%)          0                          b
       16                 fraction.c    1/5 (20.0%)          0                          c
       17                fraction.NA    1/5 (20.0%)          0                         NA
-      18                n_blq.n_blq              0          0                      n_blq
-      19    pval_counts.pval_counts         0.8254          0 p-value (chi-squared test)
+      18                      n_blq              0          0                      n_blq
+      19                pval_counts         0.8254          0 p-value (chi-squared test)
 
 # `analyze_vars` works with healthy input, default `na_rm = TRUE`.
 

diff --git a/tests/testthat/test-analyze_variables.R b/tests/testthat/test-analyze_variables.R
@@ -617,6 +617,6 @@ testthat::test_that("analyze_vars works well with additional stat names (.stat_n
   cols_int <- names(res2) %in% c("variable", "variable_level", "variable_label", "stat_name", "stat")
   testthat::expect_equal(
     unlist(res2[nrow(res2), cols_int, drop = TRUE], use.names = FALSE),
-    c("VAR2", "a_zero.a_zero", "A_ZERO", NA, 0)
+    c("VAR2", "a_zero", "A_ZERO", NA, 0)
   )
 })