Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix label and indentation processors #1379

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion R/analyze_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,13 @@ a_summary <- function(x,
)

x_stats <- x_stats[.stats]

if (is.character(x) || is.factor(x)) {
levels_per_stats <- lapply(x_stats, names) # if there is a count is table() with levels
rep_lbl <- sapply(names(levels_per_stats), function(x) levels_per_stats[x] == x)
levels_per_stats[rep_lbl] <- list(NULL)
} else {
levels_per_stats <- NULL
levels_per_stats <- names(x_stats)
}

# Formats checks
Expand Down
180 changes: 67 additions & 113 deletions R/utils_default_stats_formats_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ get_stat_names <- function(stat_results, stat_names_in = NULL) {
#' character vector from [formatters::list_valid_format_labels()] or a custom format function.
#'
#' @return
#' * `get_formats_from_stats()` returns a named vector of formats (if present in either
#' * `get_formats_from_stats()` returns a named list of formats (if present in either
#' `tern_default_formats` or `formats_in`, otherwise `NULL`). Values can be taken from
#' [formatters::list_valid_format_labels()] or a custom function (e.g. [formatting_functions]).
#'
Expand Down Expand Up @@ -252,21 +252,15 @@ get_formats_from_stats <- function(stats, formats_in = NULL) {
checkmate::assert_character(formats_in, null.ok = TRUE)
}

# Extract global defaults
which_fmt <- match(stats, names(tern_default_formats))
# Add dummy formats for each row
formats_stats <- rep(NA_character_, length(stats)) %>% setNames(stats)

# Select only needed formats from stats
ret <- vector("list", length = length(stats)) # Returning a list is simpler
ret[!is.na(which_fmt)] <- tern_default_formats[which_fmt[!is.na(which_fmt)]]
# Apply custom formats
out <- .adjust_stats_desc_by_in_def(formats_stats, formats_in, tern_default_formats)

out <- setNames(ret, stats)

# Modify some with custom formats
if (!is.null(formats_in)) {
# Stats is the main
common_names <- intersect(names(out), names(formats_in))
out[common_names] <- formats_in[common_names]
}
# Set missing formats to NULL
null_stats <- sapply(out, is.na) %>% suppressWarnings()
out[null_stats] <- list(NULL)

out
}
Expand Down Expand Up @@ -309,30 +303,23 @@ get_labels_from_stats <- function(stats, labels_in = NULL, levels_per_stats = NU
checkmate::assert_character(labels_in, null.ok = TRUE)
}

# Default for stats with sublevels (for factors or chrs) are the labels
# Apply default labels for each row
if (!is.null(levels_per_stats)) {
if (is.null(names(levels_per_stats))) {
names(levels_per_stats) <- levels_per_stats
levels_per_stats <- rep(NA_character_, length(levels_per_stats)) %>% setNames(levels_per_stats)
}
out <- .adjust_stats_desc_by_in_def(levels_per_stats, labels_in, tern_default_labels)
# numeric case, where there are not other levels (list of stats)
} else {
which_lbl <- match(stats, names(tern_default_labels))

ret <- stats # The default
ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]]
levels_per_stats <- rep(NA_character_, length(stats)) %>% setNames(stats)
}

out <- setNames(ret, stats)
# Apply custom labels
out <- .adjust_stats_desc_by_in_def(levels_per_stats, labels_in, tern_default_labels)

# Modify some with custom labels
if (!is.null(labels_in)) {
# Stats is the main
common_names <- intersect(names(out), names(labels_in))
out[common_names] <- unlist(labels_in[common_names], recursive = FALSE)
}
}
# Set missing labels to stat name
no_label <- sapply(out, is.na) %>% suppressWarnings()
out[no_label] <- names(out[no_label])

out
out %>% unlist()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we want the other .unlist_keep*?

}

#' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics.
Expand Down Expand Up @@ -370,99 +357,66 @@ get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) {
return(out)
}

# Apply default indentation
if (is.null(row_nms)) {
out <- setNames(rep(0L, length(stats)), stats)
# Get default indentation
all_nms <- if (is.null(row_nms)) {
stats
} else {
all_nms <- paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")
out <- setNames(rep(0L, length(stats) * length(row_nms)), stats)
paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = ".")
}
def_indent <- rep(0L, length(all_nms)) %>% setNames(all_nms)

# Modify with custom indentation
if (!is.null(indents_in)) {
if (is.null(row_nms)) { # One row per statistic
common_names <- intersect(names(out), names(indents_in))
out[common_names] <- indents_in[common_names]
} else if (!is.null(row_nms)) { # One row per combination of variable level and statistic
out <- sapply(
all_nms,
function(x) {
if (x %in% names(indents_in)) {
indents_in[[x]]
} else {
stat_lvl <- regmatches(x, regexpr("[.]", x), invert = TRUE)[[1]]
stat <- stat_lvl[1]
lvl <- stat_lvl[2]
if (lvl %in% names(indents_in)) {
indents_in[[lvl]]
} else if (stat %in% names(indents_in)) {
indents_in[[stat]]
} else {
0
}
}
}
)
}
}

out
# Apply custom indentation
out <- .adjust_stats_desc_by_in_def(def_indent, indents_in, NULL)
out %>% unlist()
}

# Function to loop over each stat and levels to set correct values
# levels_per_stats - every combo of statistic & level must be represented
# tern_defaults - one per statistic (names are statistic names)
# Order of precedence by info present in name: level and stat > level > stat > other defaults
.adjust_stats_desc_by_in_def <- function(levels_per_stats, user_in, tern_defaults) {
single_stats <- any(names(levels_per_stats) %in% names(tern_defaults))
if (is.list(levels_per_stats)) {
if (any(names(levels_per_stats) %in% names(tern_defaults))) single_stats <- FALSE
null_stats <- sapply(levels_per_stats, is.null)
levels_per_stats[null_stats] <- ""
levels_per_stats <- lapply(levels_per_stats, function(x) x %>% setNames(x)) %>% unlist()
levels_per_stats[names(which(null_stats))] <- NA_character_
}
out <- levels_per_stats

# Seq over the stats levels (can be also flat (stat$NULL))
for (stat_i in seq_along(levels_per_stats)) {
# If you want to change all factor levels at once by statistic
common_stat_names <- intersect(names(levels_per_stats), names(user_in))

# Levels for each statistic
nm_of_levs <- levels_per_stats[[stat_i]]
# Special case in which only stat$NULL
if (is.null(nm_of_levs)) {
nm_of_levs <- "a single NULL level"
}

# Loop over levels for each statistic
for (lev_i in seq_along(nm_of_levs)) {
# If there are no further names (stat$NULL) push label (stat) down to lowest level
if (is.null(levels_per_stats[[stat_i]])) {
lev_val <- names(levels_per_stats[stat_i])
out[[stat_i]] <- lev_val
} else {
lev_val <- levels_per_stats[[stat_i]][[lev_i]]
}

# Add default if it is a stat at last level
if (lev_val %in% names(tern_defaults)) {
out[[stat_i]][[lev_i]] <- tern_defaults[[lev_val]]
}

# If a general stat was added to the custom labels
if (names(levels_per_stats[stat_i]) %in% names(user_in)) {
out[[stat_i]][[lev_i]] <- user_in[[names(levels_per_stats[stat_i])]]
}

# If a stat level (e.g. if it is counts levels from table) was added to the custom labels
if (lev_val %in% names(user_in)) {
out[[stat_i]][[lev_i]] <- user_in[[lev_val]]
}

# If stat_i.lev_val is added to labels_in
composite_stat_lev_nm <- paste(
names(levels_per_stats[stat_i]),
lev_val,
sep = "."
)
if (composite_stat_lev_nm %in% names(user_in)) {
out[[stat_i]][[lev_i]] <- user_in[[composite_stat_lev_nm]]
if (!single_stats) { # One row per combination of variable level and statistic
out <- sapply(
names(levels_per_stats),
function(x) {
if (x %in% names(user_in)) {
user_in[[x]]
} else {
stat_lvl <- regmatches(x, regexpr("[.]", x), invert = TRUE)[[1]]
stat <- stat_lvl[1]
lvl <- stat_lvl[2]
if (lvl %in% names(user_in)) {
user_in[[lvl]]
} else if (stat %in% names(user_in)) {
user_in[[stat]]
} else { # fill in gaps with tern defaults
if ((is.null(out[[x]]) | is.na(out[[x]]))) {
if (stat %in% names(tern_defaults)) tern_defaults[[stat]] else x
} else {
out[[x]]
}
}
}
}

# Used by the unlist (to avoid count_fraction1, count_fraction2, etc.)
names(out[[stat_i]])[lev_i] <- lev_val
)
} else { # One row per statistic
if (!is.null(user_in)) {
common_stats <- intersect(names(out), names(user_in))
out[common_stats] <- user_in[common_stats]
}
# fill in gaps with tern defaults
common_stats <- intersect(names(out[is.null(out) | is.na(out)]), names(tern_defaults))
out[common_stats] <- tern_defaults[common_stats]
}

out
Expand Down
30 changes: 15 additions & 15 deletions tests/testthat/_snaps/analyze_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 5 0 n
1 n 5 0 n
2 count.a 3 0 a
3 count.b 1 0 b
4 count.c 1 0 c
Expand All @@ -1415,7 +1415,7 @@
11 fraction.a 3/5 (60.0%) 0 a
12 fraction.b 1/5 (20.0%) 0 b
13 fraction.c 1/5 (20.0%) 0 c
14 n_blq.n_blq 0 0 n_blq
14 n_blq 0 0 n_blq

---

Expand All @@ -1425,7 +1425,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 4 0 n
1 n 4 0 n
2 count.A 2 0 A
3 count.B 1 0 B
4 count.C 1 0 C
Expand All @@ -1438,7 +1438,7 @@
11 fraction.A 2/4 (50.0%) 0 A
12 fraction.B 1/4 (25.0%) 0 B
13 fraction.C 1/4 (25.0%) 0 C
14 n_blq.n_blq 0 0 n_blq
14 n_blq 0 0 n_blq

---

Expand Down Expand Up @@ -1474,7 +1474,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 5.00 -1 number of records
1 n 5.00 -1 number of records
2 count.a 2 5 a
3 count.b 1 5 b
4 count.c 1 5 c
Expand All @@ -1491,7 +1491,7 @@
15 fraction.b 1/5 (20.0%) 0 b
16 fraction.c 1/5 (20.0%) 0 c
17 fraction.NA 1/5 (20.0%) 0 NA
18 n_blq.n_blq 0 0 n_blq
18 n_blq 0 0 n_blq

# a_summary works with healthy input when compare_with_ref_group = TRUE.

Expand Down Expand Up @@ -1538,7 +1538,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 5 0 n
1 n 5 0 n
2 count.a 3 0 a
3 count.b 1 0 b
4 count.c 1 0 c
Expand All @@ -1551,8 +1551,8 @@
11 fraction.a 3/5 (60.0%) 0 a
12 fraction.b 1/5 (20.0%) 0 b
13 fraction.c 1/5 (20.0%) 0 c
14 n_blq.n_blq 0 0 n_blq
15 pval_counts.pval_counts 0.9560 0 p-value (chi-squared test)
14 n_blq 0 0 n_blq
15 pval_counts 0.9560 0 p-value (chi-squared test)

---

Expand All @@ -1562,7 +1562,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 4 0 n
1 n 4 0 n
2 count.A 2 0 A
3 count.B 1 0 B
4 count.C 1 0 C
Expand All @@ -1575,8 +1575,8 @@
11 fraction.A 2/4 (50.0%) 0 A
12 fraction.B 1/4 (25.0%) 0 B
13 fraction.C 1/4 (25.0%) 0 C
14 n_blq.n_blq 0 0 n_blq
15 pval_counts.pval_counts 0.9074 0 p-value (chi-squared test)
14 n_blq 0 0 n_blq
15 pval_counts 0.9074 0 p-value (chi-squared test)

---

Expand Down Expand Up @@ -1613,7 +1613,7 @@
RowsVerticalSection (in_rows) object print method:
----------------------------
row_name formatted_cell indent_mod row_label
1 n.n 5.00 -1 number of records
1 n 5.00 -1 number of records
2 count.a 2 5 a
3 count.b 1 5 b
4 count.c 1 5 c
Expand All @@ -1630,8 +1630,8 @@
15 fraction.b 1/5 (20.0%) 0 b
16 fraction.c 1/5 (20.0%) 0 c
17 fraction.NA 1/5 (20.0%) 0 NA
18 n_blq.n_blq 0 0 n_blq
19 pval_counts.pval_counts 0.8254 0 p-value (chi-squared test)
18 n_blq 0 0 n_blq
19 pval_counts 0.8254 0 p-value (chi-squared test)

# `analyze_vars` works with healthy input, default `na_rm = TRUE`.

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-analyze_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,6 @@ testthat::test_that("analyze_vars works well with additional stat names (.stat_n
cols_int <- names(res2) %in% c("variable", "variable_level", "variable_label", "stat_name", "stat")
testthat::expect_equal(
unlist(res2[nrow(res2), cols_int, drop = TRUE], use.names = FALSE),
c("VAR2", "a_zero.a_zero", "A_ZERO", NA, 0)
c("VAR2", "a_zero", "A_ZERO", NA, 0)
)
})
Loading
Loading