From 011cbbac976e6c2e89b8bdbb2ffa5c1a01be004f Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 10 Jan 2025 11:29:27 -0500 Subject: [PATCH] speed up target_skip() --- NEWS.md | 8 ++++---- R/class_builder.R | 30 +++++++++++++++++++----------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6e7f744e..f39ea20c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,15 +6,15 @@ Machine | Before (seconds) | After (seconds) | Speedup ---|---|---|--- -M2 Macbook | 413.16 | 25.54 | 16.177 -RHEL9 | 450.66 | 105.9 | 4.256 +M2 Macbook | 413.16 | 35.538 | 11.62587 +RHEL9 | 450.66 | 94.08 | 4.790 And for `tar_outdated()` using all the default settings Machine | Before (seconds) | After (seconds) | Speedup ---|---|---|--- -M2 Macbook | 91.314 | 17.574 | 5.196 -RHEL9 | 167.809 | 34.321 | 4.889 +M2 Macbook | 91.314 | 16.636 | 5.48894 +RHEL9 | 167.809 | 37.395 | 4.487472 To take advantage of these speed gains for an existing pipeline, you may have to run `tar_make()` to convert the time stamps and file sizes to a new format. This initial `tar_make()` is slow, but subsequent `tar_make()` calls should be much faster than before the upgrade. diff --git a/R/class_builder.R b/R/class_builder.R index 1ab28196..5404abca 100644 --- a/R/class_builder.R +++ b/R/class_builder.R @@ -104,9 +104,9 @@ target_should_run.tar_builder <- function(target, meta) { # Willing to ignore high cyclomatic complexity score. # nolint start builder_should_run <- function(target, meta) { - cue <- target$cue + cue <- .subset2(target, "cue") if (cue_meta_exists(cue, target, meta)) return(TRUE) - row <- meta$get_row(target_get_name(target)) + row <- .subset2(meta, "get_row")(target_get_name(target)) if (cue_meta(cue, target, meta, row)) return(TRUE) if (cue_always(cue, target, meta)) return(TRUE) if (cue_never(cue, target, meta)) return(FALSE) @@ -194,15 +194,19 @@ target_skip.tar_builder <- function( ) { target_update_queue(target, scheduler) name <- target_get_name(target) - row <- meta$get_row(name) + row <- .subset2(meta, "get_row")(name) path <- store_path_from_name( - store = target$store, - format = row$format, + store = .subset2(target, "store"), + format = .subset2(row, "format"), name = name, - path = unlist(row$path), - path_store = meta$store + path = unlist(.subset2(row, "path")), + path_store = .subset2(meta, "store") + ) + file_repopulate( + file = .subset2(target, "file"), + path = path, + data = .subset2(row, "data") ) - file_repopulate(target$file, path = path, data = row$data) pipeline_set_target(pipeline, target) if (active) { builder_ensure_workspace( @@ -212,12 +216,16 @@ target_skip.tar_builder <- function( meta = meta ) } + progress <- .subset2(scheduler, "progress") if_any( active, - scheduler$progress$register_skipped(target), - scheduler$progress$assign_skipped(target_get_name(target)) + .subset2(progress, "register_skipped")(target), + .subset2(progress, "assign_skipped")(target_get_name(target)) + ) + .subset2(.subset2(scheduler, "reporter"), "report_skipped")( + target, + .subset2(scheduler, "progress") ) - scheduler$reporter$report_skipped(target, scheduler$progress) } #' @export