Skip to content

Commit

Permalink
speed up target_skip()
Browse files Browse the repository at this point in the history
  • Loading branch information
wlandau committed Jan 10, 2025
1 parent a8aa492 commit 011cbba
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
8 changes: 4 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

Machine | Before (seconds) | After (seconds) | Speedup
---|---|---|---
M2 Macbook | 413.16 | 25.54 | 16.177
RHEL9 | 450.66 | 105.9 | 4.256
M2 Macbook | 413.16 | 35.538 | 11.62587
RHEL9 | 450.66 | 94.08 | 4.790

And for `tar_outdated()` using all the default settings

Machine | Before (seconds) | After (seconds) | Speedup
---|---|---|---
M2 Macbook | 91.314 | 17.574 | 5.196
RHEL9 | 167.809 | 34.321 | 4.889
M2 Macbook | 91.314 | 16.636 | 5.48894
RHEL9 | 167.809 | 37.395 | 4.487472

To take advantage of these speed gains for an existing pipeline, you may have to run `tar_make()` to convert the time stamps and file sizes to a new format. This initial `tar_make()` is slow, but subsequent `tar_make()` calls should be much faster than before the upgrade.

Expand Down
30 changes: 19 additions & 11 deletions R/class_builder.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ target_should_run.tar_builder <- function(target, meta) {
# Willing to ignore high cyclomatic complexity score.
# nolint start
builder_should_run <- function(target, meta) {
cue <- target$cue
cue <- .subset2(target, "cue")
if (cue_meta_exists(cue, target, meta)) return(TRUE)
row <- meta$get_row(target_get_name(target))
row <- .subset2(meta, "get_row")(target_get_name(target))
if (cue_meta(cue, target, meta, row)) return(TRUE)
if (cue_always(cue, target, meta)) return(TRUE)
if (cue_never(cue, target, meta)) return(FALSE)
Expand Down Expand Up @@ -194,15 +194,19 @@ target_skip.tar_builder <- function(
) {
target_update_queue(target, scheduler)
name <- target_get_name(target)
row <- meta$get_row(name)
row <- .subset2(meta, "get_row")(name)
path <- store_path_from_name(
store = target$store,
format = row$format,
store = .subset2(target, "store"),
format = .subset2(row, "format"),
name = name,
path = unlist(row$path),
path_store = meta$store
path = unlist(.subset2(row, "path")),
path_store = .subset2(meta, "store")
)
file_repopulate(
file = .subset2(target, "file"),
path = path,
data = .subset2(row, "data")
)
file_repopulate(target$file, path = path, data = row$data)
pipeline_set_target(pipeline, target)
if (active) {
builder_ensure_workspace(
Expand All @@ -212,12 +216,16 @@ target_skip.tar_builder <- function(
meta = meta
)
}
progress <- .subset2(scheduler, "progress")
if_any(
active,
scheduler$progress$register_skipped(target),
scheduler$progress$assign_skipped(target_get_name(target))
.subset2(progress, "register_skipped")(target),
.subset2(progress, "assign_skipped")(target_get_name(target))
)
.subset2(.subset2(scheduler, "reporter"), "report_skipped")(
target,
.subset2(scheduler, "progress")
)
scheduler$reporter$report_skipped(target, scheduler$progress)
}

#' @export
Expand Down

0 comments on commit 011cbba

Please sign in to comment.