From 631fe3c277bbeb601e90954358cc62fd62c6d6aa Mon Sep 17 00:00:00 2001
From: Max Kuhn <mxkuhn@gmail.com>
Date: Tue, 16 Jul 2024 08:49:43 -0400
Subject: [PATCH] Two layer neural networks (#80)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* new function for two layer models

* two layer unit tests

* avoid overflow issues

* fix doc error

* test when loss cannot be computed

* tests for tunable values

* update GHA

* update spelling

* use current CRAN torch

* add missing snapshot and remove print snapshots (due to OS differences)

* don't test on R 4.1

* test overflow on M1 mac

---------

Co-authored-by: ‘topepo’ <‘mxkuhn@gmail.com’>
---
 .github/workflows/R-CMD-check.yaml    |   16 +-
 .github/workflows/test-coverage.yaml  |   23 +-
 DESCRIPTION                           |    4 +-
 NAMESPACE                             |    7 +
 NEWS.md                               |    2 +
 R/mlp-fit.R                           | 1318 +++++++++++++++----------
 R/tunable.R                           |   33 +-
 inst/WORDLIST                         |    2 +-
 man/brulee-internal.Rd                |    3 +
 man/brulee_mlp.Rd                     |  121 ++-
 tests/testthat/_snaps/overflow.md     |   22 +
 tests/testthat/_snaps/tunable.md      |  429 ++++++++
 tests/testthat/test-mlp-multinomial.R |    2 +-
 tests/testthat/test-mlp-regression.R  |  107 +-
 tests/testthat/test-overflow.R        |   20 +
 tests/testthat/test-tunable.R         |    8 +
 16 files changed, 1562 insertions(+), 555 deletions(-)
 create mode 100644 tests/testthat/_snaps/overflow.md
 create mode 100644 tests/testthat/_snaps/tunable.md
 create mode 100644 tests/testthat/test-overflow.R
 create mode 100644 tests/testthat/test-tunable.R

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index fb81baa..67dc39b 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -12,6 +12,8 @@ on:
 
 name: R-CMD-check
 
+permissions: read-all
+
 jobs:
   R-CMD-check:
     runs-on: ${{ matrix.config.os }}
@@ -26,10 +28,12 @@ jobs:
 
           - {os: windows-latest, r: 'release'}
 
-          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
-          - {os: ubuntu-latest,   r: 'release'}
-          - {os: ubuntu-latest,   r: 'oldrel-1'}
-          - {os: ubuntu-latest,   r: 'oldrel-2'}
+          - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,  r: 'release'}
+          - {os: ubuntu-latest,  r: 'oldrel-1'}
+          - {os: ubuntu-latest,  r: 'oldrel-2'}
+          - {os: ubuntu-latest,  r: 'oldrel-3'}
+          - {os: ubuntu-latest,  r: 'oldrel-4'}
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
@@ -37,7 +41,7 @@ jobs:
       TORCH_INSTALL: 1
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -55,4 +59,4 @@ jobs:
       - uses: r-lib/actions/check-r-package@v2
         with:
           upload-snapshots: true
-          args: 'c("--no-multiarch", "--no-manual")'
+          build_args: 'c("--no-multiarch", "--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 04d5631..f2532ef 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -8,6 +8,8 @@ on:
 
 name: test-coverage
 
+permissions: read-all
+
 jobs:
   test-coverage:
     runs-on: ubuntu-latest
@@ -16,7 +18,7 @@ jobs:
       TORCH_INSTALL: 1
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -24,28 +26,37 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::covr
+          extra-packages: any::covr, any::xml2
           needs: coverage
 
       - name: Test coverage
         run: |
-          covr::codecov(
+          cov <- covr::package_coverage(
             quiet = FALSE,
             clean = FALSE,
-            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
           )
+          covr::to_cobertura(cov)
         shell: Rscript {0}
 
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
       - name: Show testthat output
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
index 7b8de96..d63d655 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,7 +25,7 @@ Imports:
     rlang (>= 1.1.1),
     stats,
     tibble,
-    torch (>= 0.11.0),
+    torch (>= 0.13.0),
     utils
 Suggests: 
     covr,
@@ -40,4 +40,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
index b575d01..4f4a7fd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,11 @@ S3method(brulee_mlp,default)
 S3method(brulee_mlp,formula)
 S3method(brulee_mlp,matrix)
 S3method(brulee_mlp,recipe)
+S3method(brulee_mlp_two_layer,data.frame)
+S3method(brulee_mlp_two_layer,default)
+S3method(brulee_mlp_two_layer,formula)
+S3method(brulee_mlp_two_layer,matrix)
+S3method(brulee_mlp_two_layer,recipe)
 S3method(brulee_multinomial_reg,data.frame)
 S3method(brulee_multinomial_reg,default)
 S3method(brulee_multinomial_reg,formula)
@@ -39,6 +44,7 @@ S3method(print,brulee_multinomial_reg)
 S3method(tunable,brulee_linear_reg)
 S3method(tunable,brulee_logistic_reg)
 S3method(tunable,brulee_mlp)
+S3method(tunable,brulee_mlp_two_layer)
 S3method(tunable,brulee_multinomial_reg)
 export("%>%")
 export(autoplot)
@@ -46,6 +52,7 @@ export(brulee_activations)
 export(brulee_linear_reg)
 export(brulee_logistic_reg)
 export(brulee_mlp)
+export(brulee_mlp_two_layer)
 export(brulee_multinomial_reg)
 export(coef)
 export(matrix_to_dataset)
diff --git a/NEWS.md b/NEWS.md
index c605dab..db7a9b8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # brulee (development version)
 
+* Added a convenience function, `brulee_mlp_two_layer()`, to more easily fit two-layer networks with parsnip.  
+
 # brulee 0.3.0
 
 * Fixed bug where `coef()` didn't would error if used on a `brulee_logistic_reg()` that was trained with a recipe. (#66)
diff --git a/R/mlp-fit.R b/R/mlp-fit.R
index 77b0d58..537424b 100644
--- a/R/mlp-fit.R
+++ b/R/mlp-fit.R
@@ -1,7 +1,9 @@
 #' Fit neural networks
 #'
 #' `brulee_mlp()` fits neural network models using stochastic gradient
-#' descent. Multiple layers can be used.
+#' descent. Multiple layers can be used. For working with two-layer networks in
+#' tidymodels, `brulee_mlp_two_layer()` can be helpful for specifying tuning
+#' parameters as scalars.
 #'
 #' @param x Depending on the context:
 #'
@@ -33,11 +35,13 @@
 #' @param hidden_units An integer for the number of hidden units, or a vector
 #'   of integers. If a vector of integers, the model will have `length(hidden_units)`
 #'   layers each with `hidden_units[i]` hidden units.
+#' @param hidden_units_2 An integer for the number of hidden units for a second layer.
 #' @param activation A character vector for the activation function (such as
 #'  "relu", "tanh", "sigmoid", and so on). See [brulee_activations()] for
 #'  a list of possible values. If `hidden_units` is a vector, `activation`
 #'  can be a character vector with length equals to `length(hidden_units)`
 #'  specifying the activation for each hidden layer.
+#' @param activation_2  A character vector for the activation function for a second layer.
 #' @param optimizer The method used in the optimization procedure. Possible choices
 #'   are 'LBFGS' and 'SGD'. Default is 'LBFGS'.
 #' @param learn_rate A positive number that controls the initial rapidity that
@@ -228,13 +232,13 @@
 #' }
 #' @export
 brulee_mlp <- function(x, ...) {
-  UseMethod("brulee_mlp")
+ UseMethod("brulee_mlp")
 }
 
 #' @export
 #' @rdname brulee_mlp
 brulee_mlp.default <- function(x, ...) {
-  stop("`brulee_mlp()` is not defined for a '", class(x)[1], "'.", call. = FALSE)
+ stop("`brulee_mlp()` is not defined for a '", class(x)[1], "'.", call. = FALSE)
 }
 
 # XY method - data frame
@@ -242,46 +246,46 @@ brulee_mlp.default <- function(x, ...) {
 #' @export
 #' @rdname brulee_mlp
 brulee_mlp.data.frame <-
-  function(x,
-           y,
-           epochs = 100L,
-           hidden_units = 3L,
-           activation = "relu",
-           penalty = 0.001,
-           mixture = 0,
-           dropout = 0,
-           validation = 0.1,
-           optimizer = "LBFGS",
-           learn_rate = 0.01,
-           rate_schedule = "none",
-           momentum = 0.0,
-           batch_size = NULL,
-           class_weights = NULL,
-           stop_iter = 5,
-           verbose = FALSE,
-           ...) {
-    processed <- hardhat::mold(x, y)
-
-    brulee_mlp_bridge(
-      processed,
-      epochs = epochs,
-      hidden_units = hidden_units,
-      activation = activation,
-      learn_rate = learn_rate,
-      rate_schedule = rate_schedule,
-      penalty = penalty,
-      mixture = mixture,
-      dropout = dropout,
-      validation = validation,
-      optimizer = optimizer,
-      momentum = momentum,
-      batch_size = batch_size,
-      class_weights = class_weights,
-      stop_iter = stop_iter,
-      verbose = verbose,
-      ...
-    )
-  }
+ function(x,
+          y,
+          epochs = 100L,
+          hidden_units = 3L,
+          activation = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(x, y)
+
+  brulee_mlp_bridge(
+   processed,
+   epochs = epochs,
+   hidden_units = hidden_units,
+   activation = activation,
+   learn_rate = learn_rate,
+   rate_schedule = rate_schedule,
+   penalty = penalty,
+   mixture = mixture,
+   dropout = dropout,
+   validation = validation,
+   optimizer = optimizer,
+   momentum = momentum,
+   batch_size = batch_size,
+   class_weights = class_weights,
+   stop_iter = stop_iter,
+   verbose = verbose,
+   ...
+  )
+ }
 
 # XY method - matrix
 
@@ -305,27 +309,27 @@ brulee_mlp.matrix <- function(x,
                               stop_iter = 5,
                               verbose = FALSE,
                               ...) {
-  processed <- hardhat::mold(x, y)
-
-  brulee_mlp_bridge(
-    processed,
-    epochs = epochs,
-    hidden_units = hidden_units,
-    activation = activation,
-    learn_rate = learn_rate,
-    rate_schedule = rate_schedule,
-    momentum = momentum,
-    penalty = penalty,
-    mixture = mixture,
-    dropout = dropout,
-    validation = validation,
-    optimizer = optimizer,
-    batch_size = batch_size,
-    class_weights = class_weights,
-    stop_iter = stop_iter,
-    verbose = verbose,
-    ...
-  )
+ processed <- hardhat::mold(x, y)
+
+ brulee_mlp_bridge(
+  processed,
+  epochs = epochs,
+  hidden_units = hidden_units,
+  activation = activation,
+  learn_rate = learn_rate,
+  rate_schedule = rate_schedule,
+  momentum = momentum,
+  penalty = penalty,
+  mixture = mixture,
+  dropout = dropout,
+  validation = validation,
+  optimizer = optimizer,
+  batch_size = batch_size,
+  class_weights = class_weights,
+  stop_iter = stop_iter,
+  verbose = verbose,
+  ...
+ )
 }
 
 # Formula method
@@ -333,92 +337,92 @@ brulee_mlp.matrix <- function(x,
 #' @export
 #' @rdname brulee_mlp
 brulee_mlp.formula <-
-  function(formula,
-           data,
-           epochs = 100L,
-           hidden_units = 3L,
-           activation = "relu",
-           penalty = 0.001,
-           mixture = 0,
-           dropout = 0,
-           validation = 0.1,
-           optimizer = "LBFGS",
-           learn_rate = 0.01,
-           rate_schedule = "none",
-           momentum = 0.0,
-           batch_size = NULL,
-           class_weights = NULL,
-           stop_iter = 5,
-           verbose = FALSE,
-           ...) {
-    processed <- hardhat::mold(formula, data)
-
-    brulee_mlp_bridge(
-      processed,
-      epochs = epochs,
-      hidden_units = hidden_units,
-      activation = activation,
-      learn_rate = learn_rate,
-      rate_schedule = rate_schedule,
-      momentum = momentum,
-      penalty = penalty,
-      mixture = mixture,
-      dropout = dropout,
-      validation = validation,
-      optimizer = optimizer,
-      batch_size = batch_size,
-      class_weights = class_weights,
-      stop_iter = stop_iter,
-      verbose = verbose,
-      ...
-    )
-  }
+ function(formula,
+          data,
+          epochs = 100L,
+          hidden_units = 3L,
+          activation = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(formula, data)
+
+  brulee_mlp_bridge(
+   processed,
+   epochs = epochs,
+   hidden_units = hidden_units,
+   activation = activation,
+   learn_rate = learn_rate,
+   rate_schedule = rate_schedule,
+   momentum = momentum,
+   penalty = penalty,
+   mixture = mixture,
+   dropout = dropout,
+   validation = validation,
+   optimizer = optimizer,
+   batch_size = batch_size,
+   class_weights = class_weights,
+   stop_iter = stop_iter,
+   verbose = verbose,
+   ...
+  )
+ }
 
 # Recipe method
 
 #' @export
 #' @rdname brulee_mlp
 brulee_mlp.recipe <-
-  function(x,
-           data,
-           epochs = 100L,
-           hidden_units = 3L,
-           activation = "relu",
-           penalty = 0.001,
-           mixture = 0,
-           dropout = 0,
-           validation = 0.1,
-           optimizer = "LBFGS",
-           learn_rate = 0.01,
-           rate_schedule = "none",
-           momentum = 0.0,
-           batch_size = NULL,
-           class_weights = NULL,
-           stop_iter = 5,
-           verbose = FALSE,
-           ...) {
-    processed <- hardhat::mold(x, data)
-
-    brulee_mlp_bridge(
-      processed,
-      epochs = epochs,
-      hidden_units = hidden_units,
-      activation = activation,
-      learn_rate = learn_rate,
-      rate_schedule = rate_schedule,
-      momentum = momentum,
-      penalty = penalty,
-      mixture = mixture,
-      dropout = dropout,
-      validation = validation,
-      optimizer = optimizer,
-      batch_size = batch_size,
-      class_weights = class_weights,
-      stop_iter = stop_iter,
-      verbose = verbose,
-      ...
-    )
-  }
+ function(x,
+          data,
+          epochs = 100L,
+          hidden_units = 3L,
+          activation = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(x, data)
+
+  brulee_mlp_bridge(
+   processed,
+   epochs = epochs,
+   hidden_units = hidden_units,
+   activation = activation,
+   learn_rate = learn_rate,
+   rate_schedule = rate_schedule,
+   momentum = momentum,
+   penalty = penalty,
+   mixture = mixture,
+   dropout = dropout,
+   validation = validation,
+   optimizer = optimizer,
+   batch_size = batch_size,
+   class_weights = class_weights,
+   stop_iter = stop_iter,
+   verbose = verbose,
+   ...
+  )
+ }
 
 # ------------------------------------------------------------------------------
 # Bridge
@@ -427,445 +431,456 @@ brulee_mlp_bridge <- function(processed, epochs, hidden_units, activation,
                               learn_rate, rate_schedule, momentum, penalty,
                               mixture, dropout, class_weights, validation, optimizer,
                               batch_size, stop_iter, verbose, ...) {
-  if(!torch::torch_is_installed()) {
-    cli::cli_abort("The torch backend has not been installed; use `torch::install_torch()`.")
-  }
-
-  f_nm <- "brulee_mlp"
-  # check values of various argument values
-  if (is.numeric(epochs) & !is.integer(epochs)) {
-    epochs <- as.integer(epochs)
-  }
-  if (is.numeric(hidden_units) & !is.integer(hidden_units)) {
-    hidden_units <- as.integer(hidden_units)
-  }
-  if (length(hidden_units) > 1 && length(activation) == 1) {
-    activation <- rep(activation, length(hidden_units))
-  }
-  if (length(hidden_units) != length(activation)) {
-    cli::cli_abort("'activation' must be a single value or a vector with the same length as 'hidden_units'")
-  }
-
-  allowed_activation <- brulee_activations()
-  good_activation <- activation %in% allowed_activation
-  if (!all(good_activation)) {
-   cli::cli_abort(paste("'activation' should be one of: ", paste0(allowed_activation, collapse = ", ")))
-  }
-
-  if (optimizer == "LBFGS" & !is.null(batch_size)) {
-   cli::cli_warn("'batch_size' is only used for the SGD optimizer.")
-   batch_size <- NULL
-  }
+ if(!torch::torch_is_installed()) {
+  cli::cli_abort("The torch backend has not been installed; use `torch::install_torch()`.")
+ }
 
-  if (!is.null(batch_size) & optimizer == "SGD") {
-    if (is.numeric(batch_size) & !is.integer(batch_size)) {
-      batch_size <- as.integer(batch_size)
-    }
-    check_integer(batch_size, single = TRUE, 1, fn = f_nm)
-  }
+ f_nm <- "brulee_mlp"
+ # check values of various argument values
+ if (is.numeric(epochs) & !is.integer(epochs)) {
+  epochs <- as.integer(epochs)
+ }
+ if (is.numeric(hidden_units) & !is.integer(hidden_units)) {
+  hidden_units <- as.integer(hidden_units)
+ }
+ if (length(hidden_units) > 1 && length(activation) == 1) {
+  activation <- rep(activation, length(hidden_units))
+ }
+ if (length(hidden_units) != length(activation)) {
+  cli::cli_abort("'activation' must be a single value or a vector with the same length as 'hidden_units'")
+ }
 
-  check_integer(epochs, single = TRUE, 1, fn = f_nm)
-  check_integer(hidden_units, single = FALSE, 1, fn = f_nm)
-  check_double(penalty, single = TRUE, 0, incl = c(TRUE, TRUE), fn = f_nm)
-  check_double(mixture, single = TRUE, 0, 1, incl = c(TRUE, TRUE), fn = f_nm)
-  check_double(dropout, single = TRUE, 0, 1, incl = c(TRUE, FALSE), fn = f_nm)
-  check_double(validation, single = TRUE, 0, 1, incl = c(TRUE, FALSE), fn = f_nm)
-  check_double(momentum, single = TRUE, 0, 1, incl = c(TRUE, TRUE), fn = f_nm)
-  check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
-  check_logical(verbose, single = TRUE, fn = f_nm)
-  check_character(activation, single = FALSE, fn = f_nm)
+ allowed_activation <- brulee_activations()
+ good_activation <- activation %in% allowed_activation
+ if (!all(good_activation)) {
+  cli::cli_abort(paste("'activation' should be one of: ", paste0(allowed_activation, collapse = ", ")))
+ }
 
-  ## -----------------------------------------------------------------------------
+ if (optimizer == "LBFGS" & !is.null(batch_size)) {
+  cli::cli_warn("'batch_size' is only used for the SGD optimizer.")
+  batch_size <- NULL
+ }
 
-  predictors <- processed$predictors
-
-  if (!is.matrix(predictors)) {
-    predictors <- as.matrix(predictors)
-    if (is.character(predictors)) {
-      cli::cli_abort(
-        paste(
-          "There were some non-numeric columns in the predictors.",
-          "Please use a formula or recipe to encode all of the predictors as numeric."
-        )
-      )
-    }
+ if (!is.null(batch_size) & optimizer == "SGD") {
+  if (is.numeric(batch_size) & !is.integer(batch_size)) {
+   batch_size <- as.integer(batch_size)
   }
+  check_integer(batch_size, single = TRUE, 1, fn = f_nm)
+ }
 
-  ## -----------------------------------------------------------------------------
-
-  outcome <- processed$outcomes[[1]]
-
-  # ------------------------------------------------------------------------------
-
-  lvls <- levels(outcome)
-  xtab <- table(outcome)
-  class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm)
-
-  ## -----------------------------------------------------------------------------
-
-  fit <-
-    mlp_fit_imp(
-      x = predictors,
-      y = outcome,
-      epochs = epochs,
-      hidden_units = hidden_units,
-      activation = activation,
-      learn_rate = learn_rate,
-      rate_schedule = rate_schedule,
-      momentum = momentum,
-      penalty = penalty,
-      mixture = mixture,
-      dropout = dropout,
-      validation = validation,
-      optimizer = optimizer,
-      batch_size = batch_size,
-      class_weights = class_weights,
-      stop_iter = stop_iter,
-      verbose = verbose,
-      ...
+ check_integer(epochs, single = TRUE, 1, fn = f_nm)
+ check_integer(hidden_units, single = FALSE, 1, fn = f_nm)
+ check_double(penalty, single = TRUE, 0, incl = c(TRUE, TRUE), fn = f_nm)
+ check_double(mixture, single = TRUE, 0, 1, incl = c(TRUE, TRUE), fn = f_nm)
+ check_double(dropout, single = TRUE, 0, 1, incl = c(TRUE, FALSE), fn = f_nm)
+ check_double(validation, single = TRUE, 0, 1, incl = c(TRUE, FALSE), fn = f_nm)
+ check_double(momentum, single = TRUE, 0, 1, incl = c(TRUE, TRUE), fn = f_nm)
+ check_double(learn_rate, single = TRUE, 0, incl = c(FALSE, TRUE), fn = f_nm)
+ check_logical(verbose, single = TRUE, fn = f_nm)
+ check_character(activation, single = FALSE, fn = f_nm)
+
+ ## -----------------------------------------------------------------------------
+
+ predictors <- processed$predictors
+
+ if (!is.matrix(predictors)) {
+  predictors <- as.matrix(predictors)
+  if (is.character(predictors)) {
+   cli::cli_abort(
+    paste(
+     "There were some non-numeric columns in the predictors.",
+     "Please use a formula or recipe to encode all of the predictors as numeric."
     )
+   )
+  }
+ }
 
-  new_brulee_mlp(
-    model_obj = fit$model_obj,
-    estimates = fit$estimates,
-    best_epoch = fit$best_epoch,
-    loss = fit$loss,
-    dims = fit$dims,
-    y_stats = fit$y_stats,
-    parameters = fit$parameters,
-    blueprint = processed$blueprint
+ ## -----------------------------------------------------------------------------
+
+ outcome <- processed$outcomes[[1]]
+
+ # ------------------------------------------------------------------------------
+
+ lvls <- levels(outcome)
+ xtab <- table(outcome)
+ class_weights <- check_class_weights(class_weights, lvls, xtab, f_nm)
+
+ ## -----------------------------------------------------------------------------
+
+ fit <-
+  mlp_fit_imp(
+   x = predictors,
+   y = outcome,
+   epochs = epochs,
+   hidden_units = hidden_units,
+   activation = activation,
+   learn_rate = learn_rate,
+   rate_schedule = rate_schedule,
+   momentum = momentum,
+   penalty = penalty,
+   mixture = mixture,
+   dropout = dropout,
+   validation = validation,
+   optimizer = optimizer,
+   batch_size = batch_size,
+   class_weights = class_weights,
+   stop_iter = stop_iter,
+   verbose = verbose,
+   ...
   )
+
+ new_brulee_mlp(
+  model_obj = fit$model_obj,
+  estimates = fit$estimates,
+  best_epoch = fit$best_epoch,
+  loss = fit$loss,
+  dims = fit$dims,
+  y_stats = fit$y_stats,
+  parameters = fit$parameters,
+  blueprint = processed$blueprint
+ )
 }
 
 new_brulee_mlp <- function( model_obj, estimates, best_epoch, loss, dims,
                             y_stats, parameters, blueprint) {
-  if (!inherits(model_obj, "raw")) {
-    cli::cli_abort("'model_obj' should be a raw vector.")
-  }
-  if (!is.list(estimates)) {
-    cli::cli_abort("'parameters' should be a list")
-  }
-  if (!is.vector(best_epoch) || !is.integer(best_epoch)) {
-    cli::cli_abort("'best_epoch' should be an integer")
-  }
-  if (!is.vector(loss) || !is.numeric(loss)) {
-    cli::cli_abort("'loss' should be a numeric vector")
-  }
-  if (!is.list(dims)) {
-    cli::cli_abort("'dims' should be a list")
-  }
-  if (!is.list(y_stats)) {
-    cli::cli_abort("'y_stats' should be a list")
-  }
-  if (!is.list(parameters)) {
-    cli::cli_abort("'parameters' should be a list")
-  }
-  if (!inherits(blueprint, "hardhat_blueprint")) {
-    cli::cli_abort("'blueprint' should be a hardhat blueprint")
-  }
-  hardhat::new_model(model_obj = model_obj,
-                     estimates = estimates,
-                     best_epoch = best_epoch,
-                     loss = loss,
-                     dims = dims,
-                     y_stats = y_stats,
-                     parameters = parameters,
-                     blueprint = blueprint,
-                     class = "brulee_mlp")
+ if (!inherits(model_obj, "raw")) {
+  cli::cli_abort("'model_obj' should be a raw vector.")
+ }
+ if (!is.list(estimates)) {
+  cli::cli_abort("'parameters' should be a list")
+ }
+ if (!is.vector(best_epoch) || !is.integer(best_epoch)) {
+  cli::cli_abort("'best_epoch' should be an integer")
+ }
+ if (!is.vector(loss) || !is.numeric(loss)) {
+  cli::cli_abort("'loss' should be a numeric vector")
+ }
+ if (!is.list(dims)) {
+  cli::cli_abort("'dims' should be a list")
+ }
+ if (!is.list(y_stats)) {
+  cli::cli_abort("'y_stats' should be a list")
+ }
+ if (!is.list(parameters)) {
+  cli::cli_abort("'parameters' should be a list")
+ }
+ if (!inherits(blueprint, "hardhat_blueprint")) {
+  cli::cli_abort("'blueprint' should be a hardhat blueprint")
+ }
+ hardhat::new_model(model_obj = model_obj,
+                    estimates = estimates,
+                    best_epoch = best_epoch,
+                    loss = loss,
+                    dims = dims,
+                    y_stats = y_stats,
+                    parameters = parameters,
+                    blueprint = blueprint,
+                    class = "brulee_mlp")
 }
 
 ## -----------------------------------------------------------------------------
 # Fit code
 
 mlp_fit_imp <-
-  function(x, y,
-           epochs = 100L,
-           batch_size = 32,
-           hidden_units = 3L,
-           penalty = 0.001,
-           mixture = 0,
-           dropout = 0,
-           validation = 0.1,
-           optimizer = "LBFGS",
-           learn_rate = 0.01,
-           rate_schedule = "none",
-           momentum = 0.0,
-           activation = "relu",
-           class_weights = NULL,
-           stop_iter = 5,
-           verbose = FALSE,
-           ...) {
-
-    torch::torch_manual_seed(sample.int(10^5, 1))
-
-    ## ---------------------------------------------------------------------------
-    # General data checks:
-
-    check_data_att(x, y)
-
-    # Check missing values
-    compl_data <- check_missing_data(x, y, "brulee_mlp", verbose)
-    x <- compl_data$x
-    y <- compl_data$y
-    n <- length(y)
-    p <- ncol(x)
-
-    if (is.factor(y)) {
-      lvls <- levels(y)
-      y_dim <- length(lvls)
-      # the model will output softmax values.
-      # so we need to use negative likelihood loss and
-      # pass the log of softmax.
-      loss_fn <- function(input, target, wts = NULL) {
-        nnf_nll_loss(
-          weight = wts,
-          input = torch::torch_log(input),
-          target = target
-        )
-      }
-    } else {
-      y_dim <- 1
-      lvls <- NULL
-      loss_fn <- function(input, target, wts = NULL) {
-        nnf_mse_loss(input, target$view(c(-1,1)))
-      }
-    }
+ function(x, y,
+          epochs = 100L,
+          batch_size = 32,
+          hidden_units = 3L,
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          activation = "relu",
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+
+  torch::torch_manual_seed(sample.int(10^5, 1))
+
+  ## ---------------------------------------------------------------------------
+  # General data checks:
+
+  check_data_att(x, y)
+
+  # Check missing values
+  compl_data <- check_missing_data(x, y, "brulee_mlp", verbose)
+  x <- compl_data$x
+  y <- compl_data$y
+  n <- length(y)
+  p <- ncol(x)
+
+  if (is.factor(y)) {
+   lvls <- levels(y)
+   y_dim <- length(lvls)
+   # the model will output softmax values.
+   # so we need to use negative likelihood loss and
+   # pass the log of softmax.
+   loss_fn <- function(input, target, wts = NULL) {
+    nnf_nll_loss(
+     weight = wts,
+     input = torch::torch_log(input),
+     target = target
+    )
+   }
+  } else {
+   y_dim <- 1
+   lvls <- NULL
+   loss_fn <- function(input, target, wts = NULL) {
+    nnf_mse_loss(input, target$view(c(-1,1)))
+   }
+  }
 
-    if (validation > 0) {
-      in_val <- sample(seq_along(y), floor(n * validation))
-      x_val <- x[in_val,, drop = FALSE]
-      y_val <- y[in_val]
-      x <- x[-in_val,, drop = FALSE]
-      y <- y[-in_val]
-    }
+  if (validation > 0) {
+   in_val <- sample(seq_along(y), floor(n * validation))
+   x_val <- x[in_val,, drop = FALSE]
+   y_val <- y[in_val]
+   x <- x[-in_val,, drop = FALSE]
+   y <- y[-in_val]
+  }
 
-    if (!is.factor(y)) {
-      y_stats <- scale_stats(y)
-      y <- scale_y(y, y_stats)
-      if (validation > 0) {
-        y_val <- scale_y(y_val, y_stats)
-      }
-      loss_label <- "\tLoss (scaled):"
-    } else {
-      y_stats <- list(mean = NA_real_, sd = NA_real_)
-      loss_label <- "\tLoss:"
-    }
+  if (!is.factor(y)) {
+   y_stats <- scale_stats(y)
+   y <- scale_y(y, y_stats)
+   if (validation > 0) {
+    y_val <- scale_y(y_val, y_stats)
+   }
+   loss_label <- "\tLoss (scaled):"
+  } else {
+   y_stats <- list(mean = NA_real_, sd = NA_real_)
+   loss_label <- "\tLoss:"
+  }
 
-    if (is.null(batch_size) & optimizer == "SGD") {
-      batch_size <- nrow(x)
-    } else {
-      batch_size <- min(batch_size, nrow(x))
-    }
+  if (is.null(batch_size) & optimizer == "SGD") {
+   batch_size <- nrow(x)
+  } else {
+   batch_size <- min(batch_size, nrow(x))
+  }
 
-    ## ---------------------------------------------------------------------------
-    # Convert to index sampler and data loader
-    ds <- brulee::matrix_to_dataset(x, y)
-    dl <- torch::dataloader(ds, batch_size = batch_size)
+  ## ---------------------------------------------------------------------------
+  # Convert to index sampler and data loader
+  ds <- brulee::matrix_to_dataset(x, y)
+  dl <- torch::dataloader(ds, batch_size = batch_size)
 
-    if (validation > 0) {
-      ds_val <- brulee::matrix_to_dataset(x_val, y_val)
-      dl_val <- torch::dataloader(ds_val)
-    }
+  if (validation > 0) {
+   ds_val <- brulee::matrix_to_dataset(x_val, y_val)
+   dl_val <- torch::dataloader(ds_val)
+  }
 
-    ## ---------------------------------------------------------------------------
-    # Initialize model and optimizer
-    model <- mlp_module(ncol(x), hidden_units, activation, dropout, y_dim)
-    loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
+  ## ---------------------------------------------------------------------------
+  # Initialize model and optimizer
+  model <- mlp_module(ncol(x), hidden_units, activation, dropout, y_dim)
+  loss_fn <- make_penalized_loss(loss_fn, model, penalty, mixture)
 
-    # Set the optimizer (will be set again below)
-    optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)
+  # Set the optimizer (will be set again below)
+  optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)
 
-    ## ---------------------------------------------------------------------------
+  ## ---------------------------------------------------------------------------
 
-    loss_prev <- 10^38
-    loss_min <- loss_prev
-    poor_epoch <- 0
-    best_epoch <- 1
-    loss_vec <- rep(NA_real_, epochs)
-    if (verbose) {
-      epoch_chr <- format(1:epochs)
-    }
+  loss_prev <- 10^38
+  loss_min <- loss_prev
+  poor_epoch <- 0
+  best_epoch <- 1
+  loss_vec <- rep(NA_real_, epochs)
+  if (verbose) {
+   epoch_chr <- format(1:epochs)
+  }
 
-    ## -----------------------------------------------------------------------------
-
-    param_per_epoch <- list()
-
-    # Optimize parameters
-    for (epoch in 1:epochs) {
-
-     # For future work with other optimizers, see
-     # https://github.com/tidymodels/brulee/pull/56#discussion_r972049108
-     # "Creating a new optimizer every epoch will reset the optimizer state.
-     # For example, SGD with momentum keeps track of the latest update for each
-     # parameter, so it might be OK to just restart.
-     # But other optimizers like Adam, will keep a moving average of updates and
-     # resetting them can interfere in training."
-
-     learn_rate <- set_learn_rate(epoch - 1, learn_rate, type = rate_schedule, ...)
-     optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)
-
-      # training loop
-      coro::loop(
-       for (batch in dl) {
-        cl <- function() {
-         optimizer_obj$zero_grad()
-         pred <- model(batch$x)
-         loss <- loss_fn(pred, batch$y, class_weights)
-         loss$backward()
-         loss
-        }
-        optimizer_obj$step(cl)
-       }
-      )
-
-      # calculate loss on the full datasets
-      if (validation > 0) {
-        pred <- model(dl_val$dataset$tensors$x)
-        loss <- loss_fn(pred, dl_val$dataset$tensors$y, class_weights)
-      } else {
-        pred <- model(dl$dataset$tensors$x)
-        loss <- loss_fn(pred, dl$dataset$tensors$y, class_weights)
-      }
-
-      # calculate losses
-      loss_curr <- loss$item()
-      loss_vec[epoch] <- loss_curr
-
-      if (is.nan(loss_curr)) {
-        cli::cli_warn("Current loss in NaN. Training wil be stopped.")
-        break()
-      }
-
-      if (loss_curr >= loss_min) {
-        poor_epoch <- poor_epoch + 1
-        loss_note <- paste0(" ", cli::symbol$cross, " ")
-      } else {
-        loss_min <- loss_curr
-        loss_note <- NULL
-        poor_epoch <- 0
-        best_epoch <- epoch
-      }
-      loss_prev <- loss_curr
-
-      # persists models and coefficients
-      param_per_epoch[[epoch]] <-
-        lapply(model$state_dict(), function(x) torch::as_array(x$cpu()))
-
-      if (verbose) {
-        msg <- paste("epoch:", epoch_chr[epoch], "learn rate", signif(learn_rate, 3),
-                     loss_label, signif(loss_curr, 3), loss_note)
-
-        cli::cli_inform(msg)
-      }
-
-      if (poor_epoch == stop_iter) {
-        break()
-      }
+  ## -----------------------------------------------------------------------------
 
+  param_per_epoch <- list()
+
+  # Optimize parameters
+  for (epoch in 1:epochs) {
+
+   # For future work with other optimizers, see
+   # https://github.com/tidymodels/brulee/pull/56#discussion_r972049108
+   # "Creating a new optimizer every epoch will reset the optimizer state.
+   # For example, SGD with momentum keeps track of the latest update for each
+   # parameter, so it might be OK to just restart.
+   # But other optimizers like Adam, will keep a moving average of updates and
+   # resetting them can interfere in training."
+
+   learn_rate <- set_learn_rate(epoch - 1, learn_rate, type = rate_schedule, ...)
+   optimizer_obj <- set_optimizer(optimizer, model, learn_rate, momentum)
+
+   # training loop
+   coro::loop(
+    for (batch in dl) {
+     cl <- function() {
+      optimizer_obj$zero_grad()
+      pred <- model(batch$x)
+      loss <- loss_fn(pred, batch$y, class_weights)
+      loss$backward()
+      loss
+     }
+     optimizer_obj$step(cl)
     }
+   )
+
+   # calculate loss on the full datasets
+   if (validation > 0) {
+    pred <- model(dl_val$dataset$tensors$x)
+    loss <- loss_fn(pred, dl_val$dataset$tensors$y, class_weights)
+   } else {
+    pred <- model(dl$dataset$tensors$x)
+    loss <- loss_fn(pred, dl$dataset$tensors$y, class_weights)
+   }
+
+   # calculate losses
+   loss_curr <- loss$item()
+   loss_vec[epoch] <- loss_curr
+
+   if (is.nan(loss_curr)) {
+    cli::cli_warn("Loss is NaN at epoch {epoch}. Training is stopped.")
+    break()
+   }
+
+   if (loss_curr >= loss_min) {
+    poor_epoch <- poor_epoch + 1
+    loss_note <- paste0(" ", cli::symbol$cross, " ")
+   } else {
+    loss_min <- loss_curr
+    loss_note <- NULL
+    poor_epoch <- 0
+    best_epoch <- epoch
+   }
+   loss_prev <- loss_curr
+
+   # persists models and coefficients
+   param_per_epoch[[epoch]] <-
+    lapply(model$state_dict(), function(x) torch::as_array(x$cpu()))
+
+   if (verbose) {
+    msg <- paste("epoch:", epoch_chr[epoch], "learn rate", signif(learn_rate, 3),
+                 loss_label, signif(loss_curr, 3), loss_note)
+
+    cli::cli_inform(msg)
+   }
+
+   if (poor_epoch == stop_iter) {
+    break()
+   }
 
-    # ------------------------------------------------------------------------------
-
-    class_weights <- as.numeric(class_weights)
-    names(class_weights) <- lvls
-
-    ## ---------------------------------------------------------------------------
-
-    list(
-      model_obj = model_to_raw(model),
-      estimates = param_per_epoch,
-      loss = loss_vec[1:length(param_per_epoch)],
-      best_epoch = best_epoch,
-      dims = list(p = p, n = n, h = hidden_units, y = y_dim, levels = lvls, features = colnames(x)),
-      y_stats = y_stats,
-      parameters = list(
-       activation = activation,
-       hidden_units = hidden_units,
-       learn_rate = learn_rate,
-       class_weights = class_weights,
-       penalty = penalty,
-       mixture = mixture,
-       dropout = dropout,
-       validation = validation,
-       optimizer = optimizer,
-       batch_size = batch_size,
-       momentum = momentum,
-       sched = rate_schedule,
-       sched_opt = list(...)
-      )
-    )
   }
 
+  # ------------------------------------------------------------------------------
+
+  class_weights <- as.numeric(class_weights)
+  names(class_weights) <- lvls
+
+  ## ---------------------------------------------------------------------------
+
+  list(
+   model_obj = model_to_raw(model),
+   estimates = param_per_epoch,
+   loss = loss_vec[1:length(param_per_epoch)],
+   best_epoch = best_epoch,
+   dims = list(p = p, n = n, h = hidden_units, y = y_dim, levels = lvls,
+               features = colnames(x)),
+   y_stats = y_stats,
+   parameters = list(
+    activation = activation,
+    hidden_units = hidden_units,
+    learn_rate = learn_rate,
+    class_weights = class_weights,
+    penalty = penalty,
+    mixture = mixture,
+    dropout = dropout,
+    validation = validation,
+    optimizer = optimizer,
+    batch_size = batch_size,
+    momentum = momentum,
+    sched = rate_schedule,
+    sched_opt = list(...)
+   )
+  )
+ }
+
 
 mlp_module <-
-  torch::nn_module(
-    "mlp_module",
-    initialize = function(num_pred, hidden_units, act_type, dropout, y_dim) {
-
-      layers <- list()
-
-      # input layer
-      layers[[1]] <- torch::nn_linear(num_pred, hidden_units[1])
-      layers[[2]] <- get_activation_fn(act_type[1])
-
-      # if hidden units is a vector then we add those layers
-      if (length(hidden_units) > 1) {
-        for (i in 2:length(hidden_units)) {
-          layers[[length(layers) + 1]] <-
-            torch::nn_linear(hidden_units[i-1], hidden_units[i])
-
-          layers[[length(layers) + 1]] <- get_activation_fn(act_type[i])
-        }
-      }
-
-      # we only add dropout between the last layer and the output layer
-      if (dropout > 0) {
-        layers[[length(layers) + 1]] <- torch::nn_dropout(p = dropout)
-      }
-
-      # output layer
-      layers[[length(layers) + 1]] <-
-        torch::nn_linear(hidden_units[length(hidden_units)], y_dim)
-
-      # conditionally add the softmax layer
-      if (y_dim > 1) {
-        layers[[length(layers) + 1]] <- torch::nn_softmax(dim = 2)
-      }
-
-      # create a sequential module that calls the layers in the same order.
-      self$model <- torch::nn_sequential(!!!layers)
-
-    },
-    forward = function(x) {
-      self$model(x)
+ torch::nn_module(
+  "mlp_module",
+  initialize = function(num_pred, hidden_units, act_type, dropout, y_dim) {
+
+   layers <- list()
+
+   # input layer
+   layers[[1]] <- torch::nn_linear(num_pred, hidden_units[1])
+   layers[[2]] <- get_activation_fn(act_type[1])
+
+   # if hidden units is a vector then we add those layers
+   if (length(hidden_units) > 1) {
+    for (i in 2:length(hidden_units)) {
+     layers[[length(layers) + 1]] <-
+      torch::nn_linear(hidden_units[i-1], hidden_units[i])
+     layers[[length(layers) + 1]] <- get_activation_fn(act_type[i])
     }
-  )
+   }
+
+   # we only add dropout between the last layer and the output layer
+   if (dropout > 0) {
+    layers[[length(layers) + 1]] <- torch::nn_dropout(p = dropout)
+   }
+
+   # output layer
+   layers[[length(layers) + 1]] <-
+    torch::nn_linear(hidden_units[length(hidden_units)], y_dim)
+
+   # conditionally add the softmax layer
+   if (y_dim > 1) {
+    layers[[length(layers) + 1]] <- torch::nn_softmax(dim = 2)
+   }
+
+   # create a sequential module that calls the layers in the same order.
+   self$model <- torch::nn_sequential(!!!layers)
+
+  },
+  forward = function(x) {
+   self$model(x)
+  }
+ )
 
 ## -----------------------------------------------------------------------------
 
 get_num_mlp_coef <- function(x) {
-  length(unlist(x$estimates[[1]]))
+ length(unlist(x$estimates[[1]]))
 }
 
 get_units<- function(x) {
-  if (length(x$dims$h) > 1) {
-    res <- paste0("c(", paste(x$dims$h, collapse = ","), ") hidden units,")
-  } else {
-    res <- paste(format(x$dims$h, big.mark = ","), "hidden units,")
-  }
-  res
+ if (length(x$dims$h) > 1) {
+  res <- paste0("c(", paste(x$dims$h, collapse = ","), ") hidden units,")
+ } else {
+  res <- paste(format(x$dims$h, big.mark = ","), "hidden units,")
+ }
+ res
+}
+
+get_acts <- function(x) {
+ if (length(x$dims$h) > 1) {
+  res <- paste0("c(", paste(x$parameters$activation, collapse = ","), ") activation,")
+ } else {
+  res <- paste(x$parameters$activation, "activation,")
+ }
+ res
 }
 
 
+
 #' @export
 print.brulee_mlp <- function(x, ...) {
-  cat("Multilayer perceptron\n\n")
-  cat(x$param$activation, "activation\n")
-  cat(
-    get_units(x), "",
-    format(get_num_mlp_coef(x), big.mark = ","), "model parameters\n"
-  )
-  brulee_print(x, ...)
+ cat("Multilayer perceptron\n\n")
+ cat(
+  get_acts(x), "\n",
+  get_units(x), "\n",
+  format(get_num_mlp_coef(x), big.mark = ","), " model parameters\n",
+  sep = ""
+ )
+ brulee_print(x, ...)
 }
 
 ## -----------------------------------------------------------------------------
@@ -880,3 +895,238 @@ set_optimizer <- function(optimizer, model, learn_rate, momentum) {
  }
  res
 }
+
+init_layer <- function(layer, act) {
+ gain_for_rng <- torch::nn_init_calculate_gain(act)
+ torch::nn_init_xavier_normal_(layer, gain_for_rng)
+}
+
+# ------------------------------------------------------------------------------
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer <- function(x, ...) {
+ UseMethod("brulee_mlp_two_layer")
+}
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer.default <- function(x, ...) {
+ stop("`brulee_mlp_two_layer()` is not defined for a '", class(x)[1], "'.", call. = FALSE)
+}
+
+# XY method - data frame
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer.data.frame <-
+ function(x,
+          y,
+          epochs = 100L,
+          hidden_units = 3L,
+          hidden_units_2 = 3L,
+          activation = "relu",
+          activation_2 = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(x, y)
+
+  hidden_units_all <- c(hidden_units, hidden_units_2)
+  activation_all <- c(activation, activation_2)
+
+  res <-
+   brulee_mlp_bridge(
+    processed,
+    epochs = epochs,
+    hidden_units = hidden_units_all,
+    activation = activation_all,
+    learn_rate = learn_rate,
+    rate_schedule = rate_schedule,
+    penalty = penalty,
+    mixture = mixture,
+    dropout = dropout,
+    validation = validation,
+    optimizer = optimizer,
+    momentum = momentum,
+    batch_size = batch_size,
+    class_weights = class_weights,
+    stop_iter = stop_iter,
+    verbose = verbose,
+    ...
+   )
+  class(res) <- c("brulee_mlp_two_layer", class(res))
+  res
+ }
+
+# XY method - matrix
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer.matrix <- function(x,
+                                        y,
+                                        epochs = 100L,
+                                        hidden_units = 3L,
+                                        hidden_units_2 = 3L,
+                                        activation = "relu",
+                                        activation_2 = "relu",
+                                        penalty = 0.001,
+                                        mixture = 0,
+                                        dropout = 0,
+                                        validation = 0.1,
+                                        optimizer = "LBFGS",
+                                        learn_rate = 0.01,
+                                        rate_schedule = "none",
+                                        momentum = 0.0,
+                                        batch_size = NULL,
+                                        class_weights = NULL,
+                                        stop_iter = 5,
+                                        verbose = FALSE,
+                                        ...) {
+ processed <- hardhat::mold(x, y)
+
+ hidden_units_all <- c(hidden_units, hidden_units_2)
+ activation_all <- c(activation, activation_2)
+
+ res <-
+  brulee_mlp_bridge(
+   processed,
+   epochs = epochs,
+   hidden_units = hidden_units_all,
+   activation = activation_all,
+   learn_rate = learn_rate,
+   rate_schedule = rate_schedule,
+   momentum = momentum,
+   penalty = penalty,
+   mixture = mixture,
+   dropout = dropout,
+   validation = validation,
+   optimizer = optimizer,
+   batch_size = batch_size,
+   class_weights = class_weights,
+   stop_iter = stop_iter,
+   verbose = verbose,
+   ...
+  )
+ class(res) <- c("brulee_mlp_two_layer", class(res))
+ res
+}
+
+# Formula method
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer.formula <-
+ function(formula,
+          data,
+          epochs = 100L,
+          hidden_units = 3L,
+          hidden_units_2 = 3L,
+          activation = "relu",
+          activation_2 = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(formula, data)
+
+  hidden_units_all <- c(hidden_units, hidden_units_2)
+  activation_all <- c(activation, activation_2)
+
+  res <-
+   brulee_mlp_bridge(
+    processed,
+    epochs = epochs,
+    hidden_units = hidden_units_all,
+    activation = activation_all,
+    learn_rate = learn_rate,
+    rate_schedule = rate_schedule,
+    momentum = momentum,
+    penalty = penalty,
+    mixture = mixture,
+    dropout = dropout,
+    validation = validation,
+    optimizer = optimizer,
+    batch_size = batch_size,
+    class_weights = class_weights,
+    stop_iter = stop_iter,
+    verbose = verbose,
+    ...
+   )
+  class(res) <- c("brulee_mlp_two_layer", class(res))
+  res
+ }
+
+# Recipe method
+
+#' @export
+#' @rdname brulee_mlp
+brulee_mlp_two_layer.recipe <-
+ function(x,
+          data,
+          epochs = 100L,
+          hidden_units = 3L,
+          hidden_units_2 = 3L,
+          activation = "relu",
+          activation_2 = "relu",
+          penalty = 0.001,
+          mixture = 0,
+          dropout = 0,
+          validation = 0.1,
+          optimizer = "LBFGS",
+          learn_rate = 0.01,
+          rate_schedule = "none",
+          momentum = 0.0,
+          batch_size = NULL,
+          class_weights = NULL,
+          stop_iter = 5,
+          verbose = FALSE,
+          ...) {
+  processed <- hardhat::mold(x, data)
+
+  hidden_units_all <- c(hidden_units, hidden_units_2)
+  activation_all <- c(activation, activation_2)
+
+  res <-
+   brulee_mlp_bridge(
+    processed,
+    epochs = epochs,
+    hidden_units = hidden_units_all,
+    activation = activation_all,
+    learn_rate = learn_rate,
+    rate_schedule = rate_schedule,
+    momentum = momentum,
+    penalty = penalty,
+    mixture = mixture,
+    dropout = dropout,
+    validation = validation,
+    optimizer = optimizer,
+    batch_size = batch_size,
+    class_weights = class_weights,
+    stop_iter = stop_iter,
+    verbose = verbose,
+    ...
+   )
+  class(res) <- c("brulee_mlp_two_layer", class(res))
+  res
+ }
+
diff --git a/R/tunable.R b/R/tunable.R
index 78eb3b8..367f2ce 100644
--- a/R/tunable.R
+++ b/R/tunable.R
@@ -1,3 +1,5 @@
+tune_activations <- c("relu", "tanh", "elu", "log_sigmoid", "tanhshrink")
+
 #' Internal functions and methods
 #' @export
 #' @keywords internal
@@ -9,7 +11,7 @@ tunable.brulee_mlp <- function(x, ...) {
       call_info = list(
          list(pkg = "dials", fun = "epochs", range = c(5L, 500L)),
          list(pkg = "dials", fun = "hidden_units", range = c(2L, 50L)),
-         list(pkg = "dials", fun = "activation"),
+         list(pkg = "dials", fun = "activation", values = tune_activations),
          list(pkg = "dials", fun = "penalty"),
          list(pkg = "dials", fun = "dropout"),
          list(pkg = "dials", fun = "learn_rate", range = c(-3, -1/5)),
@@ -24,6 +26,35 @@ tunable.brulee_mlp <- function(x, ...) {
    )
 }
 
+#' @export
+#' @keywords internal
+#' @name brulee-internal
+tunable.brulee_mlp_two_layer <- function(x, ...) {
+ tibble::tibble(
+  name = c('epochs', 'hidden_units', 'hidden_units_2', 'activation', 'activation_2',
+           'penalty', 'dropout', 'learn_rate', 'momentum', 'batch_size',
+           'class_weights', 'stop_iter'),
+  call_info = list(
+   list(pkg = "dials", fun = "epochs", range = c(5L, 500L)),
+   list(pkg = "dials", fun = "hidden_units", range = c(2L, 50L)),
+   list(pkg = "dials", fun = "hidden_units", range = c(2L, 50L)),
+   list(pkg = "dials", fun = "activation", values = tune_activations),
+   list(pkg = "dials", fun = "activation", values = tune_activations),
+   list(pkg = "dials", fun = "penalty"),
+   list(pkg = "dials", fun = "dropout"),
+   list(pkg = "dials", fun = "learn_rate", range = c(-3, -1/5)),
+   list(pkg = "dials", fun = "momentum", range = c(0.50, 0.95)),
+   list(pkg = "dials", fun = "batch_size"),
+   list(pkg = "dials", fun = "stop_iter"),
+   list(pkg = "dials", fun = "class_weights")
+  ),
+  source = "model_spec",
+  component = class(x)[class(x) != "model_spec"][1],
+  component_id =  "main"
+ )
+}
+
+
 #' @export
 #' @keywords internal
 #' @rdname brulee-internal
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 56eb8d0..c5851ad 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,7 +5,6 @@ Lifecycle
 ORCID
 PBC
 SGD
-elu
 extensibility
 funder
 mlp
@@ -15,3 +14,4 @@ relu
 sigmoid
 tanh
 tibble
+tidymodels
diff --git a/man/brulee-internal.Rd b/man/brulee-internal.Rd
index 489345e..b2b831d 100644
--- a/man/brulee-internal.Rd
+++ b/man/brulee-internal.Rd
@@ -3,6 +3,7 @@
 \name{brulee-internal}
 \alias{brulee-internal}
 \alias{tunable.brulee_mlp}
+\alias{tunable.brulee_mlp_two_layer}
 \alias{tunable.brulee_logistic_reg}
 \alias{tunable.brulee_multinomial_reg}
 \alias{tunable.brulee_linear_reg}
@@ -10,6 +11,8 @@
 \usage{
 \method{tunable}{brulee_mlp}(x, ...)
 
+\method{tunable}{brulee_mlp_two_layer}(x, ...)
+
 \method{tunable}{brulee_logistic_reg}(x, ...)
 
 \method{tunable}{brulee_multinomial_reg}(x, ...)
diff --git a/man/brulee_mlp.Rd b/man/brulee_mlp.Rd
index 83b3440..573a68d 100644
--- a/man/brulee_mlp.Rd
+++ b/man/brulee_mlp.Rd
@@ -7,6 +7,12 @@
 \alias{brulee_mlp.matrix}
 \alias{brulee_mlp.formula}
 \alias{brulee_mlp.recipe}
+\alias{brulee_mlp_two_layer}
+\alias{brulee_mlp_two_layer.default}
+\alias{brulee_mlp_two_layer.data.frame}
+\alias{brulee_mlp_two_layer.matrix}
+\alias{brulee_mlp_two_layer.formula}
+\alias{brulee_mlp_two_layer.recipe}
 \title{Fit neural networks}
 \usage{
 brulee_mlp(x, ...)
@@ -96,6 +102,102 @@ brulee_mlp(x, ...)
   verbose = FALSE,
   ...
 )
+
+brulee_mlp_two_layer(x, ...)
+
+\method{brulee_mlp_two_layer}{default}(x, ...)
+
+\method{brulee_mlp_two_layer}{data.frame}(
+  x,
+  y,
+  epochs = 100L,
+  hidden_units = 3L,
+  hidden_units_2 = 3L,
+  activation = "relu",
+  activation_2 = "relu",
+  penalty = 0.001,
+  mixture = 0,
+  dropout = 0,
+  validation = 0.1,
+  optimizer = "LBFGS",
+  learn_rate = 0.01,
+  rate_schedule = "none",
+  momentum = 0,
+  batch_size = NULL,
+  class_weights = NULL,
+  stop_iter = 5,
+  verbose = FALSE,
+  ...
+)
+
+\method{brulee_mlp_two_layer}{matrix}(
+  x,
+  y,
+  epochs = 100L,
+  hidden_units = 3L,
+  hidden_units_2 = 3L,
+  activation = "relu",
+  activation_2 = "relu",
+  penalty = 0.001,
+  mixture = 0,
+  dropout = 0,
+  validation = 0.1,
+  optimizer = "LBFGS",
+  learn_rate = 0.01,
+  rate_schedule = "none",
+  momentum = 0,
+  batch_size = NULL,
+  class_weights = NULL,
+  stop_iter = 5,
+  verbose = FALSE,
+  ...
+)
+
+\method{brulee_mlp_two_layer}{formula}(
+  formula,
+  data,
+  epochs = 100L,
+  hidden_units = 3L,
+  hidden_units_2 = 3L,
+  activation = "relu",
+  activation_2 = "relu",
+  penalty = 0.001,
+  mixture = 0,
+  dropout = 0,
+  validation = 0.1,
+  optimizer = "LBFGS",
+  learn_rate = 0.01,
+  rate_schedule = "none",
+  momentum = 0,
+  batch_size = NULL,
+  class_weights = NULL,
+  stop_iter = 5,
+  verbose = FALSE,
+  ...
+)
+
+\method{brulee_mlp_two_layer}{recipe}(
+  x,
+  data,
+  epochs = 100L,
+  hidden_units = 3L,
+  hidden_units_2 = 3L,
+  activation = "relu",
+  activation_2 = "relu",
+  penalty = 0.001,
+  mixture = 0,
+  dropout = 0,
+  validation = 0.1,
+  optimizer = "LBFGS",
+  learn_rate = 0.01,
+  rate_schedule = "none",
+  momentum = 0,
+  batch_size = NULL,
+  class_weights = NULL,
+  stop_iter = 5,
+  verbose = FALSE,
+  ...
+)
 }
 \arguments{
 \item{x}{Depending on the context:
@@ -126,7 +228,7 @@ specified as:
 of integers. If a vector of integers, the model will have \code{length(hidden_units)}
 layers each with \code{hidden_units[i]} hidden units.}
 
-\item{activation}{A character vector for the activation function )such as
+\item{activation}{A character vector for the activation function (such as
 "relu", "tanh", "sigmoid", and so on). See \code{\link[=brulee_activations]{brulee_activations()}} for
 a list of possible values. If \code{hidden_units} is a vector, \code{activation}
 can be a character vector with length equals to \code{length(hidden_units)}
@@ -184,6 +286,10 @@ and the predictor term(s) on the right-hand side.}
 \itemize{
 \item A \strong{data frame} containing both the predictors and the outcome.
 }}
+
+\item{hidden_units_2}{An integer for the number of hidden units for a second layer.}
+
+\item{activation_2}{A character vector for the activation function for a second layer.}
 }
 \value{
 A \code{brulee_mlp} object with elements:
@@ -202,7 +308,9 @@ likelihood for classification) at each epoch.
 }
 \description{
 \code{brulee_mlp()} fits neural network models using stochastic gradient
-descent. Multiple layers can be used.
+descent. Multiple layers can be used. For working with two-layer networks in
+tidymodels, \code{brulee_mlp_two_layer()} can be helpful for specifying tuning
+parameters as scalars.
 }
 \details{
 This function fits feed-forward neural network models for regression (when
@@ -303,6 +411,15 @@ if (torch::torch_is_installed()) {
    bind_cols(ames_test) \%>\%
    rmse(Sale_Price, .pred)
 
+ # Using multiple hidden layers and activation functions
+ set.seed(2)
+ hidden_fit <- brulee_mlp(ames_rec, data = ames_train,
+                    hidden_units = c(15L, 17L), activation = c("relu", "elu"),
+                    dropout = 0.05, rate_schedule = "cyclic", step_size = 4)
+
+ predict(hidden_fit, ames_test) \%>\%
+   bind_cols(ames_test) \%>\%
+   rmse(Sale_Price, .pred)
 
  # ------------------------------------------------------------------------------
  # classification
diff --git a/tests/testthat/_snaps/overflow.md b/tests/testthat/_snaps/overflow.md
new file mode 100644
index 0000000..2e6b895
--- /dev/null
+++ b/tests/testthat/_snaps/overflow.md
@@ -0,0 +1,22 @@
+# NaN loss due to overflow
+
+    Loss is NaN at epoch 10. Training is stopped.
+
+---
+
+    Code
+      print(mlp_fit)
+    Output
+      Multilayer perceptron
+      
+      relu activation,
+      10 hidden units,
+      52 model parameters
+      200 samples, 2 features, 2 classes 
+      class weights one=1, two=1 
+      weight decay: 0.001 
+      dropout proportion: 0 
+      batch size: 180 
+      learn rate: 0.01 
+      validation loss after 9 epochs: 0.469 
+
diff --git a/tests/testthat/_snaps/tunable.md b/tests/testthat/_snaps/tunable.md
new file mode 100644
index 0000000..0f3d5ad
--- /dev/null
+++ b/tests/testthat/_snaps/tunable.md
@@ -0,0 +1,429 @@
+# tunable values
+
+    Code
+      brulee:::tunable.brulee_linear_reg(1)$call_info
+    Output
+      [[1]]
+      [[1]]$pkg
+      [1] "dials"
+      
+      [[1]]$fun
+      [1] "epochs"
+      
+      [[1]]$range
+      [1]   5 100
+      
+      
+      [[2]]
+      [[2]]$pkg
+      [1] "dials"
+      
+      [[2]]$fun
+      [1] "penalty"
+      
+      
+      [[3]]
+      [[3]]$pkg
+      [1] "dials"
+      
+      [[3]]$fun
+      [1] "learn_rate"
+      
+      [[3]]$range
+      [1] -3.0 -0.2
+      
+      
+      [[4]]
+      [[4]]$pkg
+      [1] "dials"
+      
+      [[4]]$fun
+      [1] "momentum"
+      
+      [[4]]$range
+      [1] 0.50 0.95
+      
+      
+      [[5]]
+      [[5]]$pkg
+      [1] "dials"
+      
+      [[5]]$fun
+      [1] "batch_size"
+      
+      
+      [[6]]
+      [[6]]$pkg
+      [1] "dials"
+      
+      [[6]]$fun
+      [1] "stop_iter"
+      
+      
+
+---
+
+    Code
+      brulee:::tunable.brulee_logistic_reg(1)$call_info
+    Output
+      [[1]]
+      [[1]]$pkg
+      [1] "dials"
+      
+      [[1]]$fun
+      [1] "epochs"
+      
+      [[1]]$range
+      [1]   5 100
+      
+      
+      [[2]]
+      [[2]]$pkg
+      [1] "dials"
+      
+      [[2]]$fun
+      [1] "penalty"
+      
+      
+      [[3]]
+      [[3]]$pkg
+      [1] "dials"
+      
+      [[3]]$fun
+      [1] "learn_rate"
+      
+      [[3]]$range
+      [1] -3.0 -0.2
+      
+      
+      [[4]]
+      [[4]]$pkg
+      [1] "dials"
+      
+      [[4]]$fun
+      [1] "momentum"
+      
+      [[4]]$range
+      [1] 0.50 0.95
+      
+      
+      [[5]]
+      [[5]]$pkg
+      [1] "dials"
+      
+      [[5]]$fun
+      [1] "batch_size"
+      
+      
+      [[6]]
+      [[6]]$pkg
+      [1] "dials"
+      
+      [[6]]$fun
+      [1] "stop_iter"
+      
+      
+      [[7]]
+      [[7]]$pkg
+      [1] "dials"
+      
+      [[7]]$fun
+      [1] "class_weights"
+      
+      
+
+---
+
+    Code
+      brulee:::tunable.brulee_multinomial_reg(1)$call_info
+    Output
+      [[1]]
+      [[1]]$pkg
+      [1] "dials"
+      
+      [[1]]$fun
+      [1] "epochs"
+      
+      [[1]]$range
+      [1]   5 100
+      
+      
+      [[2]]
+      [[2]]$pkg
+      [1] "dials"
+      
+      [[2]]$fun
+      [1] "penalty"
+      
+      
+      [[3]]
+      [[3]]$pkg
+      [1] "dials"
+      
+      [[3]]$fun
+      [1] "learn_rate"
+      
+      [[3]]$range
+      [1] -3.0 -0.2
+      
+      
+      [[4]]
+      [[4]]$pkg
+      [1] "dials"
+      
+      [[4]]$fun
+      [1] "momentum"
+      
+      [[4]]$range
+      [1] 0.50 0.95
+      
+      
+      [[5]]
+      [[5]]$pkg
+      [1] "dials"
+      
+      [[5]]$fun
+      [1] "batch_size"
+      
+      
+      [[6]]
+      [[6]]$pkg
+      [1] "dials"
+      
+      [[6]]$fun
+      [1] "stop_iter"
+      
+      
+      [[7]]
+      [[7]]$pkg
+      [1] "dials"
+      
+      [[7]]$fun
+      [1] "class_weights"
+      
+      
+
+---
+
+    Code
+      brulee:::tunable.brulee_mlp(1)$call_info
+    Output
+      [[1]]
+      [[1]]$pkg
+      [1] "dials"
+      
+      [[1]]$fun
+      [1] "epochs"
+      
+      [[1]]$range
+      [1]   5 500
+      
+      
+      [[2]]
+      [[2]]$pkg
+      [1] "dials"
+      
+      [[2]]$fun
+      [1] "hidden_units"
+      
+      [[2]]$range
+      [1]  2 50
+      
+      
+      [[3]]
+      [[3]]$pkg
+      [1] "dials"
+      
+      [[3]]$fun
+      [1] "activation"
+      
+      [[3]]$values
+      [1] "relu"        "tanh"        "elu"         "log_sigmoid" "tanhshrink" 
+      
+      
+      [[4]]
+      [[4]]$pkg
+      [1] "dials"
+      
+      [[4]]$fun
+      [1] "penalty"
+      
+      
+      [[5]]
+      [[5]]$pkg
+      [1] "dials"
+      
+      [[5]]$fun
+      [1] "dropout"
+      
+      
+      [[6]]
+      [[6]]$pkg
+      [1] "dials"
+      
+      [[6]]$fun
+      [1] "learn_rate"
+      
+      [[6]]$range
+      [1] -3.0 -0.2
+      
+      
+      [[7]]
+      [[7]]$pkg
+      [1] "dials"
+      
+      [[7]]$fun
+      [1] "momentum"
+      
+      [[7]]$range
+      [1] 0.50 0.95
+      
+      
+      [[8]]
+      [[8]]$pkg
+      [1] "dials"
+      
+      [[8]]$fun
+      [1] "batch_size"
+      
+      
+      [[9]]
+      [[9]]$pkg
+      [1] "dials"
+      
+      [[9]]$fun
+      [1] "stop_iter"
+      
+      
+      [[10]]
+      [[10]]$pkg
+      [1] "dials"
+      
+      [[10]]$fun
+      [1] "class_weights"
+      
+      
+
+---
+
+    Code
+      brulee:::tunable.brulee_mlp_two_layer(1)$call_info
+    Output
+      [[1]]
+      [[1]]$pkg
+      [1] "dials"
+      
+      [[1]]$fun
+      [1] "epochs"
+      
+      [[1]]$range
+      [1]   5 500
+      
+      
+      [[2]]
+      [[2]]$pkg
+      [1] "dials"
+      
+      [[2]]$fun
+      [1] "hidden_units"
+      
+      [[2]]$range
+      [1]  2 50
+      
+      
+      [[3]]
+      [[3]]$pkg
+      [1] "dials"
+      
+      [[3]]$fun
+      [1] "hidden_units"
+      
+      [[3]]$range
+      [1]  2 50
+      
+      
+      [[4]]
+      [[4]]$pkg
+      [1] "dials"
+      
+      [[4]]$fun
+      [1] "activation"
+      
+      [[4]]$values
+      [1] "relu"        "tanh"        "elu"         "log_sigmoid" "tanhshrink" 
+      
+      
+      [[5]]
+      [[5]]$pkg
+      [1] "dials"
+      
+      [[5]]$fun
+      [1] "activation"
+      
+      [[5]]$values
+      [1] "relu"        "tanh"        "elu"         "log_sigmoid" "tanhshrink" 
+      
+      
+      [[6]]
+      [[6]]$pkg
+      [1] "dials"
+      
+      [[6]]$fun
+      [1] "penalty"
+      
+      
+      [[7]]
+      [[7]]$pkg
+      [1] "dials"
+      
+      [[7]]$fun
+      [1] "dropout"
+      
+      
+      [[8]]
+      [[8]]$pkg
+      [1] "dials"
+      
+      [[8]]$fun
+      [1] "learn_rate"
+      
+      [[8]]$range
+      [1] -3.0 -0.2
+      
+      
+      [[9]]
+      [[9]]$pkg
+      [1] "dials"
+      
+      [[9]]$fun
+      [1] "momentum"
+      
+      [[9]]$range
+      [1] 0.50 0.95
+      
+      
+      [[10]]
+      [[10]]$pkg
+      [1] "dials"
+      
+      [[10]]$fun
+      [1] "batch_size"
+      
+      
+      [[11]]
+      [[11]]$pkg
+      [1] "dials"
+      
+      [[11]]$fun
+      [1] "stop_iter"
+      
+      
+      [[12]]
+      [[12]]$pkg
+      [1] "dials"
+      
+      [[12]]$fun
+      [1] "class_weights"
+      
+      
+
diff --git a/tests/testthat/test-mlp-multinomial.R b/tests/testthat/test-mlp-multinomial.R
index 745fbab..99e13f1 100644
--- a/tests/testthat/test-mlp-multinomial.R
+++ b/tests/testthat/test-mlp-multinomial.R
@@ -30,7 +30,7 @@ test_that("basic multinomial mlp LBFGS", {
   mnl_fit_lbfgs <-
    brulee_mlp(class ~ .,
               mnl_tr,
-              epochs = 200,
+              epochs = 10,
               hidden_units = 5,
               rate_schedule = "cyclic",
               learn_rate = 0.1)},
diff --git a/tests/testthat/test-mlp-regression.R b/tests/testthat/test-mlp-regression.R
index 18b1eef..47da8d8 100644
--- a/tests/testthat/test-mlp-regression.R
+++ b/tests/testthat/test-mlp-regression.R
@@ -327,7 +327,6 @@ test_that('bad args', {
 
 test_that("mlp learns something", {
  skip_if(!torch::torch_is_installed())
- skip_on_os("mac", arch = "aarch64")
 
  # ------------------------------------------------------------------------------
 
@@ -350,7 +349,6 @@ test_that("mlp learns something", {
 })
 test_that("variable hidden_units length", {
  skip_if(!torch::torch_is_installed())
- skip_on_os("mac", arch = "aarch64")
 
  x <- data.frame(x = rnorm(1000))
  y <- 2 * x$x
@@ -376,3 +374,108 @@ test_that("variable hidden_units length", {
  )
 
 })
+
+
+test_that('two-layer networks', {
+ skip_if(!torch::torch_is_installed())
+
+ skip_if_not_installed("modeldata")
+ skip_if_not_installed("yardstick")
+ skip_if_not_installed("recipes")
+
+ suppressPackageStartupMessages(library(dplyr))
+ suppressPackageStartupMessages(library(recipes))
+
+ # ------------------------------------------------------------------------------
+
+ set.seed(585)
+ reg_tr <- modeldata::sim_regression(5000)
+ reg_te <- modeldata::sim_regression(1000)
+
+ reg_tr_x_df <- reg_tr[, -1]
+ reg_tr_x_mat <- as.matrix(reg_tr_x_df)
+ reg_tr_y <- reg_tr$outcome
+
+ reg_rec <-
+  recipe(outcome ~ ., data = reg_tr) %>%
+  step_normalize(all_predictors())
+
+  # ------------------------------------------------------------------------------
+
+ # matrix x
+ expect_error({
+  set.seed(1)
+  mlp_reg_mat_two_fit <-
+   brulee_mlp_two_layer(
+    reg_tr_x_mat,
+    reg_tr_y,
+    mixture = 0,
+    learn_rate = .1,
+    hidden_units = 5,
+    hidden_units_2 = 10,
+    activation = "relu",
+    activation_2 = "elu"
+   )
+ },
+ regex = NA)
+
+ expect_error({
+  set.seed(1)
+  mlp_reg_mat_two_check_fit <-
+   brulee_mlp(
+    reg_tr_x_mat,
+    reg_tr_y,
+    mixture = 0,
+    learn_rate = .1,
+    hidden_units = c(5, 10),
+    activation = c("relu", "elu")
+   )
+ },
+ regex = NA)
+
+ expect_equal(mlp_reg_mat_two_fit$loss, mlp_reg_mat_two_check_fit$loss)
+
+ # data frame x (all numeric)
+ expect_error(
+  mlp_reg_df_two_fit <-
+   brulee_mlp_two_layer(
+    reg_tr_x_df,
+    reg_tr_y,
+    validation = .2,
+    hidden_units = 5,
+    hidden_units_2 = 10,
+    activation = "celu",
+    activation_2 = "gelu"
+   ),
+  regex = NA
+ )
+
+ # formula (mixed)
+ expect_error({
+  set.seed(8373)
+  mlp_reg_f_two_fit <- brulee_mlp_two_layer(
+   outcome ~ .,
+   reg_tr,
+   hidden_units = 5,
+   hidden_units_2 = 10,
+   activation = "hardshrink",
+   activation_2 = "hardsigmoid"
+  )
+ },
+ regex = NA)
+
+ # recipe
+ expect_error({
+  set.seed(8373)
+  mlp_reg_rec_two_fit <- brulee_mlp_two_layer(
+   reg_rec,
+   reg_tr,
+   hidden_units = 5,
+   hidden_units_2 = 10,
+   activation = "hardtanh",
+   activation_2 = "sigmoid"
+  )
+ },
+ regex = NA)
+
+})
diff --git a/tests/testthat/test-overflow.R b/tests/testthat/test-overflow.R
new file mode 100644
index 0000000..4dca35a
--- /dev/null
+++ b/tests/testthat/test-overflow.R
@@ -0,0 +1,20 @@
+
+
+test_that("NaN loss due to overflow", {
+ skip_if_not_installed("modeldata")
+ skip_on_os(c("windows", "linux", "solaris"))
+ skip_on_os("mac", arch = "x86_64")
+
+ i <- 81872
+ set.seed(i)
+ data_tr <- modeldata::sim_logistic(200, ~ .1 + 2 * A - 3 * B + 1 * A *B, corr = .7)
+
+ expect_snapshot_warning({
+  set.seed(i+1)
+  mlp_fit <- brulee_mlp(class ~ ., data = data_tr, hidden_units = 10,
+                        stop_iter = Inf)
+ })
+ expect_snapshot(print(mlp_fit))
+ expect_equal(length(mlp_fit$estimates), 9)
+
+})
diff --git a/tests/testthat/test-tunable.R b/tests/testthat/test-tunable.R
new file mode 100644
index 0000000..6dba141
--- /dev/null
+++ b/tests/testthat/test-tunable.R
@@ -0,0 +1,8 @@
+
+test_that("tunable values", {
+ expect_snapshot(brulee:::tunable.brulee_linear_reg(1)$call_info)
+ expect_snapshot(brulee:::tunable.brulee_logistic_reg(1)$call_info)
+ expect_snapshot(brulee:::tunable.brulee_multinomial_reg(1)$call_info)
+ expect_snapshot(brulee:::tunable.brulee_mlp(1)$call_info)
+ expect_snapshot(brulee:::tunable.brulee_mlp_two_layer(1)$call_info)
+})