From bb6ecaba6b9eed93c583aedea047a4f93be28c0a Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 15 Jan 2025 14:56:58 -0800 Subject: [PATCH 01/13] add `toggle_sparsity()` --- R/fit.R | 2 ++ R/sparsevctrs.R | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/R/fit.R b/R/fit.R index 13d1315..8c42940 100644 --- a/R/fit.R +++ b/R/fit.R @@ -71,6 +71,8 @@ fit.workflow <- function(object, data, ..., calibration = NULL, control = contro ) } + object <- toggle_sparsity(object, data) + workflow <- object workflow <- .fit_pre(workflow, data) workflow <- .fit_model(workflow, control) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index bbb865a..b272d42 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -1,3 +1,79 @@ is_sparse_matrix <- function(x) { methods::is(x, "sparseMatrix") } + +toggle_sparsity <- function(object, data) { + toggle_sparse <- "no" + + if (allow_sparse(object$fit$actions$model$spec)) { + if ("recipe" %in% names(object$pre$actions)) { + est_sparsity <- recipes::.recipes_estimate_sparsity( + object$pre$actions$recipe$recipe + ) + } else { + est_sparsity <- sparsevctrs::sparsity(data, 1000) + } + + pred_log_fold <- pred_log_fold( + est_sparsity, + object$fit$actions$model$spec$engine, + nrow(data) + ) + if (pred_log_fold > 0) { + toggle_sparse <- "yes" + } + } + + object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args( + object$pre$actions$recipe$recipe, + choice = toggle_sparse + ) + object +} + +allow_sparse <- function(x) { + if (inherits(x, "model_fit")) { + x <- x$spec + } + res <- parsnip::get_from_env(paste0(class(x)[1], "_encoding")) + all(res$allow_sparse_x[res$engine == x$engine]) +} + +pred_log_fold <- function(sparsity, model, n_rows) { + if (is.null(model) || model == "ranger") { + return("no") + } + + log_fold <- -0.599333138645995 + + ifelse(sparsity < 0.836601307189543, 0.836601307189543 - sparsity, 0) * + -0.541581853008009 + + ifelse(n_rows < 16000, 16000 - n_rows, 0) * 3.23980908942813e-05 + + ifelse(n_rows > 16000, n_rows - 16000, 0) * -2.81001152147355e-06 + + ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) * + 9.82444255114058 + + ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) * + ifelse(n_rows > 8000, n_rows - 8000, 0) * + 7.27456967763306e-05 + + ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) * + ifelse(n_rows < 8000, 8000 - n_rows, 0) * + -0.000798307404212627 + + if (model == "xgboost") { + log_fold <- log_fold + + ifelse(sparsity < 0.984615384615385, 0.984615384615385 - sparsity, 0) * + 0.113098025073806 + + ifelse(n_rows < 8000, 8000 - n_rows, 0) * -9.77914237255269e-05 + + ifelse(n_rows > 8000, n_rows - 8000, 0) * 3.22657666511869e-06 + + ifelse(sparsity > 0.984615384615385, sparsity - 0.984615384615385, 0) * + 41.5180348086939 + + 0.913457808326756 + } + + if (model == "LiblineaR") { + log_fold <- log_fold + + ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) * + -5.39592564852111 + } + + log_fold +} From f68285e6acdceb8b70228a5740c0b8dbfb5f245b Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 16 Jan 2025 12:14:57 -0800 Subject: [PATCH 02/13] move recipes to imports --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 181d507..af08bff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,6 +25,7 @@ Imports: lifecycle (>= 1.0.3), modelenv (>= 0.1.0), parsnip (>= 1.2.1.9000), + recipes (>= 1.0.10.9000), rlang (>= 1.1.0), tidyselect (>= 1.2.0), sparsevctrs (>= 0.1.0.9002), @@ -42,7 +43,6 @@ Suggests: methods, modeldata (>= 1.0.0), probably, - recipes (>= 1.0.10.9000), rmarkdown, testthat (>= 3.0.0) VignetteBuilder: From 39943be6f92ebe0fb3c877b953494dfe1f7db639 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 08:50:29 -0800 Subject: [PATCH 03/13] Apply suggestions from code review Co-authored-by: Simon P. Couch --- R/sparsevctrs.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index b272d42..1c970ed 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -5,18 +5,18 @@ is_sparse_matrix <- function(x) { toggle_sparsity <- function(object, data) { toggle_sparse <- "no" - if (allow_sparse(object$fit$actions$model$spec)) { - if ("recipe" %in% names(object$pre$actions)) { + if (allow_sparse(expect_spec_parsnip(object))) { + if (has_preprocessor_recipe(object)) { est_sparsity <- recipes::.recipes_estimate_sparsity( - object$pre$actions$recipe$recipe + extract_preprocessor(object) ) } else { - est_sparsity <- sparsevctrs::sparsity(data, 1000) + est_sparsity <- sparsevctrs::sparsity(data, sample = 1000) } pred_log_fold <- pred_log_fold( est_sparsity, - object$fit$actions$model$spec$engine, + extract_spec_parsnip(object)$engine, nrow(data) ) if (pred_log_fold > 0) { From 56304cdd941e48fe3c090e337b2df15732e99742 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 08:58:02 -0800 Subject: [PATCH 04/13] up sparsevctrs version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index af08bff..abff23e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,7 @@ Imports: recipes (>= 1.0.10.9000), rlang (>= 1.1.0), tidyselect (>= 1.2.0), - sparsevctrs (>= 0.1.0.9002), + sparsevctrs (>= 0.1.0.9003), vctrs (>= 0.4.1), withr Suggests: From 7ffc1f9d122ee425417eb263431578d8f0ddd85a Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 09:19:44 -0800 Subject: [PATCH 05/13] pred_log_fold return -Inf instead of "no" to align --- R/sparsevctrs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 1c970ed..ce0ec1e 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -41,7 +41,7 @@ allow_sparse <- function(x) { pred_log_fold <- function(sparsity, model, n_rows) { if (is.null(model) || model == "ranger") { - return("no") + return(-Inf) } log_fold <- -0.599333138645995 + From c13d98e9e6918ff210ff76c23ec646437ffc96a2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 09:42:29 -0800 Subject: [PATCH 06/13] only do things in toggle_sparsity if there is a recipe --- R/sparsevctrs.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index ce0ec1e..e091a4e 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -3,31 +3,32 @@ is_sparse_matrix <- function(x) { } toggle_sparsity <- function(object, data) { - toggle_sparse <- "no" - - if (allow_sparse(expect_spec_parsnip(object))) { - if (has_preprocessor_recipe(object)) { - est_sparsity <- recipes::.recipes_estimate_sparsity( - extract_preprocessor(object) - ) - } else { - est_sparsity <- sparsevctrs::sparsity(data, sample = 1000) - } + if ( + allow_sparse(object$fit$actions$model$spec) && + has_preprocessor_recipe(object) + ) { + est_sparsity <- recipes::.recipes_estimate_sparsity( + extract_preprocessor(object) + ) pred_log_fold <- pred_log_fold( est_sparsity, extract_spec_parsnip(object)$engine, nrow(data) ) + + toggle_sparse <- "no" + if (pred_log_fold > 0) { toggle_sparse <- "yes" } + + object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args( + object$pre$actions$recipe$recipe, + choice = toggle_sparse + ) } - object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args( - object$pre$actions$recipe$recipe, - choice = toggle_sparse - ) object } From 96e134971e00a4c6976d1830bce9bd71008ddf8f Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 09:50:42 -0800 Subject: [PATCH 07/13] rename pred_log_fold() to should_use_sparsity() --- R/sparsevctrs.R | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index e091a4e..6eb9384 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -11,18 +11,12 @@ toggle_sparsity <- function(object, data) { extract_preprocessor(object) ) - pred_log_fold <- pred_log_fold( + toggle_sparse <- should_use_sparsity( est_sparsity, extract_spec_parsnip(object)$engine, nrow(data) ) - toggle_sparse <- "no" - - if (pred_log_fold > 0) { - toggle_sparse <- "yes" - } - object$pre$actions$recipe$recipe <- recipes::.recipes_toggle_sparse_args( object$pre$actions$recipe$recipe, choice = toggle_sparse @@ -40,9 +34,9 @@ allow_sparse <- function(x) { all(res$allow_sparse_x[res$engine == x$engine]) } -pred_log_fold <- function(sparsity, model, n_rows) { +should_use_sparsity <- function(sparsity, model, n_rows) { if (is.null(model) || model == "ranger") { - return(-Inf) + return("no") } log_fold <- -0.599333138645995 + @@ -76,5 +70,5 @@ pred_log_fold <- function(sparsity, model, n_rows) { -5.39592564852111 } - log_fold + ifelse(log_fold > 0, "yes", "no") } From 3836124cab958c84380f284945b5ce04b01ac964 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 10:01:35 -0800 Subject: [PATCH 08/13] document should_use_sparsity() --- R/sparsevctrs.R | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 6eb9384..d75b5c1 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -34,8 +34,27 @@ allow_sparse <- function(x) { all(res$allow_sparse_x[res$engine == x$engine]) } -should_use_sparsity <- function(sparsity, model, n_rows) { - if (is.null(model) || model == "ranger") { +# This function was created using from the output of a mars model fit on the +# simulation data generated in `analysis/time_analysis.R` +# https://github.com/tidymodels/benchmark-sparsity-threshold +# +# The model was extracted using {tidypredict} and hand-tuned for speed. +# +# The model was fit on `sparsity`, `engine` and `n_rows` and the outcome was +# `log_fold` which is defined as +# `log(time to fit with dense data / time to fit with sparse data)`. +# Meaning that values above above 0 would reflects longer fit times for dense, +# Hence we want to use sparse data. +# +# At this time the only engines that support sparse data are glmnet, LiblineaR, +# ranger, and xgboost. Which is why they are the only ones listed here. +# This is fine as this code will only run if `allow_sparse()` returns `TRUE` +# Which only happens for these engines. +# +# Ranger is hard-coded to always fail since they appear to use the same +# algorithm for sparse and dense data, resulting in identical times. +should_use_sparsity <- function(sparsity, engine, n_rows) { + if (is.null(engine) || engine == "ranger") { return("no") } @@ -53,7 +72,7 @@ should_use_sparsity <- function(sparsity, model, n_rows) { ifelse(n_rows < 8000, 8000 - n_rows, 0) * -0.000798307404212627 - if (model == "xgboost") { + if (engine == "xgboost") { log_fold <- log_fold + ifelse(sparsity < 0.984615384615385, 0.984615384615385 - sparsity, 0) * 0.113098025073806 + @@ -64,7 +83,7 @@ should_use_sparsity <- function(sparsity, model, n_rows) { 0.913457808326756 } - if (model == "LiblineaR") { + if (engine == "LiblineaR") { log_fold <- log_fold + ifelse(sparsity > 0.836601307189543, sparsity - 0.836601307189543, 0) * -5.39592564852111 From 2162736337eefb4689d80d0700f4635ad599482c Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 10:21:12 -0800 Subject: [PATCH 09/13] document toggle_sparsity() --- R/sparsevctrs.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index d75b5c1..2111dea 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -2,6 +2,12 @@ is_sparse_matrix <- function(x) { methods::is(x, "sparseMatrix") } +# This function takes a workflow and its data. If the model supports sparse data +# And there is a recipe, then it uses `should_use_sparsity()` to determine +# whether all the `sparse = "auto"` should be turned to `"yes"` or `"no"` in the +# recipe. +# +# Done using flow chart in https://github.com/tidymodels/workflows/issues/271 toggle_sparsity <- function(object, data) { if ( allow_sparse(object$fit$actions$model$spec) && From 843de34ba1c83869ef6ad123dbb26f8393ec7e90 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 10:48:51 -0800 Subject: [PATCH 10/13] test toggle_sparsity directly --- tests/testthat/test-sparsevctrs.R | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 1116118..18e8d3c 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -191,3 +191,89 @@ test_that("fit() errors if sparse matrix has no colnames", { fit(wf_spec, hotel_data) ) }) + +test_that("toggle_sparsity changes auto to yes", { + skip_if_not_installed("glmnet") + skip_if_not_installed("modeldata") + + data("ames", package = "modeldata") + + tree_spec <- parsnip::boost_tree("regression", "xgboost") + + rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% + recipes::step_dummy(recipes::all_nominal_predictors()) + + wf_spec <- workflow(rec_spec, tree_spec) + + res <- toggle_sparsity(wf_spec, ames) + + expect_identical( + extract_preprocessor(res)$steps[[1]]$sparse, + "yes" + ) +}) + +test_that("toggle_sparsity doesn't change no", { + skip_if_not_installed("glmnet") + skip_if_not_installed("modeldata") + + data("ames", package = "modeldata") + + tree_spec <- parsnip::boost_tree("regression", "xgboost") + + rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% + recipes::step_dummy(recipes::all_nominal_predictors(), sparse = "no") + + wf_spec <- workflow(rec_spec, tree_spec) + + res <- toggle_sparsity(wf_spec, ames) + + expect_identical( + extract_preprocessor(res)$steps[[1]]$sparse, + "no" + ) +}) + +test_that("toggle_sparsity changes auto to no", { + skip_if_not_installed("glmnet") + skip_if_not_installed("modeldata") + + data("ames", package = "modeldata") + + tree_spec <- parsnip::boost_tree("regression", "xgboost") + + # if we only dummy 1 variable it doesn't make the data sparse enough + rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% + recipes::step_dummy(MS_Zoning) + + wf_spec <- workflow(rec_spec, tree_spec) + + res <- toggle_sparsity(wf_spec, ames) + + expect_identical( + extract_preprocessor(res)$steps[[1]]$sparse, + "no" + ) +}) + +test_that("toggle_sparsity doesn't change yes", { + skip_if_not_installed("glmnet") + skip_if_not_installed("modeldata") + + data("ames", package = "modeldata") + + tree_spec <- parsnip::boost_tree("regression", "xgboost") + + # if we only dummy 1 variable it doesn't make the data sparse enough + rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% + recipes::step_dummy(MS_Zoning, sparse = "yes") + + wf_spec <- workflow(rec_spec, tree_spec) + + res <- toggle_sparsity(wf_spec, ames) + + expect_identical( + extract_preprocessor(res)$steps[[1]]$sparse, + "yes" + ) +}) From 138da2909b1164d9f25de3ee4e73643bfaf8f361 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 11:02:06 -0800 Subject: [PATCH 11/13] test toggle_sparsity inside fit --- tests/testthat/test-sparsevctrs.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 18e8d3c..b14b4df 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -277,3 +277,22 @@ test_that("toggle_sparsity doesn't change yes", { "yes" ) }) + +test_that("toggle_sparsity doesn't break fit", { + skip_if_not_installed("glmnet") + skip_if_not_installed("modeldata") + + data("ames", package = "modeldata") + + tree_spec <- parsnip::boost_tree("regression", "xgboost") + + rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% + recipes::step_dummy(recipes::all_nominal_predictors()) + + wf_spec <- workflow(rec_spec, tree_spec) + + expect_no_error( + fit(wf_spec, ames) + ) +}) + From 2a41454b1095c6cc2be0e01d0c0340b367baf380 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 11:29:53 -0800 Subject: [PATCH 12/13] use glmnet instead of xgboost --- tests/testthat/test-sparsevctrs.R | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index b14b4df..3468abf 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -197,8 +197,10 @@ test_that("toggle_sparsity changes auto to yes", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") + ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + ames <- ames[1:100, ] - tree_spec <- parsnip::boost_tree("regression", "xgboost") + tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% recipes::step_dummy(recipes::all_nominal_predictors()) @@ -218,8 +220,10 @@ test_that("toggle_sparsity doesn't change no", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") + ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + ames <- ames[1:100, ] - tree_spec <- parsnip::boost_tree("regression", "xgboost") + tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% recipes::step_dummy(recipes::all_nominal_predictors(), sparse = "no") @@ -239,8 +243,10 @@ test_that("toggle_sparsity changes auto to no", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") + ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + ames <- ames[1:100, ] - tree_spec <- parsnip::boost_tree("regression", "xgboost") + tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) # if we only dummy 1 variable it doesn't make the data sparse enough rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% @@ -261,8 +267,10 @@ test_that("toggle_sparsity doesn't change yes", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") + ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + ames <- ames[1:100, ] - tree_spec <- parsnip::boost_tree("regression", "xgboost") + tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) # if we only dummy 1 variable it doesn't make the data sparse enough rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% @@ -283,8 +291,10 @@ test_that("toggle_sparsity doesn't break fit", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") + ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + ames <- ames[1:100, ] - tree_spec <- parsnip::boost_tree("regression", "xgboost") + tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) rec_spec <- recipes::recipe(Sale_Price ~ ., data = ames) %>% recipes::step_dummy(recipes::all_nominal_predictors()) From 5d1c4f48288f9cac764d9eaf73e058f05fc65bfb Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 17 Jan 2025 12:07:07 -0800 Subject: [PATCH 13/13] don't use dplyr --- tests/testthat/test-sparsevctrs.R | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 3468abf..c627fbb 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -197,7 +197,14 @@ test_that("toggle_sparsity changes auto to yes", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") - ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + fcts <- c( + 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, + 17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, + 37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L + ) + outcome <- 72 + + ames <- ames[c(fcts, outcome)] ames <- ames[1:100, ] tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) @@ -220,7 +227,14 @@ test_that("toggle_sparsity doesn't change no", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") - ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + fcts <- c( + 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, + 17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, + 37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L + ) + outcome <- 72 + + ames <- ames[c(fcts, outcome)] ames <- ames[1:100, ] tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) @@ -243,7 +257,14 @@ test_that("toggle_sparsity changes auto to no", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") - ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + fcts <- c( + 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, + 17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, + 37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L + ) + outcome <- 72 + + ames <- ames[c(fcts, outcome)] ames <- ames[1:100, ] tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) @@ -267,7 +288,14 @@ test_that("toggle_sparsity doesn't change yes", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") - ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + fcts <- c( + 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, + 17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, + 37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L + ) + outcome <- 72 + + ames <- ames[c(fcts, outcome)] ames <- ames[1:100, ] tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0) @@ -291,7 +319,14 @@ test_that("toggle_sparsity doesn't break fit", { skip_if_not_installed("modeldata") data("ames", package = "modeldata") - ames <- dplyr::select(ames, Sale_Price, dplyr::where(is.factor)) + fcts <- c( + 1L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, + 17L, 20L, 21L, 22L, 23L, 24L, 26L, 27L, 28L, 29L, 30L, 32L, 36L, + 37L, 38L, 39L, 50L, 52L, 53L, 56L, 57L, 64L, 65L, 66L, 70L, 71L + ) + outcome <- 72 + + ames <- ames[c(fcts, outcome)] ames <- ames[1:100, ] tree_spec <- parsnip::linear_reg("regression", "glmnet", penalty = 0)