From f94b63a1a3b7cf0f7bbf18ab5e3dba16febaab7b Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Tue, 10 Oct 2023 13:18:00 +0200 Subject: [PATCH] Allow PageRank to be used independently of the ISCC For certain workloads it may not be necessary to use PageRank to automatically pick an optimally sized worker. Still, it may be useful to enable the Initial Size Class Cache (ISCC). That way it's possible to observe timing statistics in bb_browser. This change adds a simple StrategyCalculator that is functionally equivalent to FallbackAnalyzer. It always runs actions on the smallest size class, falling back to running it on the largest. --- .../configuration/scheduler/scheduler.proto | 77 +++++++++++-------- pkg/scheduler/initialsizeclass/BUILD.bazel | 1 + .../initialsizeclass/configuration.go | 22 ++++-- ...smallest_size_class_strategy_calculator.go | 33 ++++++++ 4 files changed, 92 insertions(+), 41 deletions(-) create mode 100644 pkg/scheduler/initialsizeclass/smallest_size_class_strategy_calculator.go diff --git a/pkg/proto/configuration/scheduler/scheduler.proto b/pkg/proto/configuration/scheduler/scheduler.proto index 6bedd93d..ee7e6d98 100644 --- a/pkg/proto/configuration/scheduler/scheduler.proto +++ b/pkg/proto/configuration/scheduler/scheduler.proto @@ -152,23 +152,13 @@ message InitialSizeClassAnalyzerConfiguration { // Maximum permitted execution timeout. google.protobuf.Duration maximum_execution_timeout = 2; - // When not set, run all actions on the smallest size class for which - // workers exist. Upon failure, retry actions on the largest size - // class. This mode is not recommended for setups with more than two - // size classes, or workloads where build times matter. + // When set, persist statistics on execution times and outcomes into + // the Initial Size Class Cache (ISCC), so that future invocations of + // similar actions can be scheduled more intelligently. // - // When set, run all actions on the largest size class if not seen - // before. Future invocations of actions with the same command line - // arguments and environment variables will run on all size classes, - // using probabilities based on how their execution times compare to - // those of the largest size class. - // - // Statistics on execution times are persisted into the Initial Size - // Class Cache (ISCC). bb_browser is also capable of reading data from - // this data store, making it possible to view these statistics by - // visiting the page of an action. Because of this, enabling this - // option requires that bb_scheduler's 'initial_size_class_cache' - // option is set as well. + // bb_browser is also capable of reading data from this data store, + // making it possible to view these statistics by visiting the page of + // an action. InitialSizeClassFeedbackDrivenAnalyzerConfiguration feedback_driven = 3; } @@ -191,6 +181,40 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration { // Recommended value: 86400s (1 day) google.protobuf.Duration failure_cache_duration = 1; + // Was 'acceptable_execution_time_increase_exponent', + // 'smaller_size_class_execution_timeout_multiplier', + // 'minimum_execution_timeout', and 'maximum_convergence_error'. + // + // These options have moved into 'page_rank'. + reserved 2, 3, 4, 5; + + // The number of action outcomes to store per size class. Increasing + // this improves the accuracy of timing information that is captured, + // but has the downside that the system responds to changes in + // behavior of actions less quickly. + // + // To ensure that the system does not end up in a steady state where + // actions are always run on the same size class, there is roughly a + // 1.0 / history_size probability that actions are run on sizes + // classes other than the optimum, regardless of historical outcomes. + // + // Recommended value: 32 + int32 history_size = 6; + + // When not set, run all actions on the smallest size class for which + // workers exist. Upon failure, retry actions on the largest size + // class. This mode is not recommended for setups with more than two + // size classes, or workloads where build times matter. + // + // When set, run all actions on the largest size class if not seen + // before. Future invocations of actions with the same command line + // arguments and environment variables will run on all size classes, + // using probabilities based on how their execution times compare to + // those of the largest size class. + InitialSizeClassPageRankStrategyCalculatorConfiguration page_rank = 7; +} + +message InitialSizeClassPageRankStrategyCalculatorConfiguration { // An exponent to determine whether an increase in execution time when // scheduling an action on a smaller size class is considere // acceptable. @@ -220,7 +244,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration { // may cause actions that are only somewhat parallel to run slower. // // Recommended value: somewhere between 0.2 and 0.8. - double acceptable_execution_time_increase_exponent = 2; + double acceptable_execution_time_increase_exponent = 1; // Actions scheduled on smaller size classes are run with a reduced // timeout value that is based on the acceptable execution time of the @@ -239,7 +263,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration { // insert a higher quality data point into the ISCC. // // Recommended value: 1.5 - double smaller_size_class_execution_timeout_multiplier = 3; + double smaller_size_class_execution_timeout_multiplier = 2; // The execution timeout value that is used on smaller size classes is // proportional to the median execution time observed on the largest @@ -251,7 +275,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration { // be used to set an lower bound on the execution timeout. // // Recommended value: 10s - google.protobuf.Duration minimum_execution_timeout = 4; + google.protobuf.Duration minimum_execution_timeout = 3; // This implementation compares previous execution stats between every // pair of size classes. The resulting scores are stored in a @@ -284,18 +308,5 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration { // are made to this option to assess the performance impact. // // Recommended value: 0.002 - double maximum_convergence_error = 5; - - // The number of action outcomes to store per size class. Increasing - // this improves the accuracy of timing information that is captured, - // but has the downside that the system responds to changes in - // behavior of actions less quickly. - // - // To ensure that the system does not end up in a steady state where - // actions are always run on the same size class, there is roughly a - // 1.0 / history_size probability that actions are run on sizes - // classes other than the optimum, regardless of historical outcomes. - // - // Recommended value: 32 - int32 history_size = 6; + double maximum_convergence_error = 4; } diff --git a/pkg/scheduler/initialsizeclass/BUILD.bazel b/pkg/scheduler/initialsizeclass/BUILD.bazel index c6f91269..d000f3e6 100644 --- a/pkg/scheduler/initialsizeclass/BUILD.bazel +++ b/pkg/scheduler/initialsizeclass/BUILD.bazel @@ -10,6 +10,7 @@ go_library( "feedback_driven_analyzer.go", "outcomes.go", "page_rank_strategy_calculator.go", + "smallest_size_class_strategy_calculator.go", "strategy_calculator.go", ], importpath = "github.com/buildbarn/bb-remote-execution/pkg/scheduler/initialsizeclass", diff --git a/pkg/scheduler/initialsizeclass/configuration.go b/pkg/scheduler/initialsizeclass/configuration.go index d5f71749..7b2100ee 100644 --- a/pkg/scheduler/initialsizeclass/configuration.go +++ b/pkg/scheduler/initialsizeclass/configuration.go @@ -37,21 +37,27 @@ func NewAnalyzerFromConfiguration(configuration *pb.InitialSizeClassAnalyzerConf if err := failureCacheDuration.CheckValid(); err != nil { return nil, util.StatusWrap(err, "Invalid failure cache duration") } - minimumExecutionTimeout := fdConfiguration.MinimumExecutionTimeout - if err := minimumExecutionTimeout.CheckValid(); err != nil { - return nil, util.StatusWrap(err, "Invalid minimum acceptable execution time") + + strategyCalculator := SmallestSizeClassStrategyCalculator + if pageRankConfiguration := fdConfiguration.PageRank; pageRankConfiguration != nil { + minimumExecutionTimeout := pageRankConfiguration.MinimumExecutionTimeout + if err := minimumExecutionTimeout.CheckValid(); err != nil { + return nil, util.StatusWrap(err, "Invalid minimum acceptable execution time") + } + strategyCalculator = NewPageRankStrategyCalculator( + minimumExecutionTimeout.AsDuration(), + pageRankConfiguration.AcceptableExecutionTimeIncreaseExponent, + pageRankConfiguration.SmallerSizeClassExecutionTimeoutMultiplier, + pageRankConfiguration.MaximumConvergenceError) } + return NewFeedbackDrivenAnalyzer( previousExecutionStatsStore, random.NewFastSingleThreadedGenerator(), clock.SystemClock, actionTimeoutExtractor, failureCacheDuration.AsDuration(), - NewPageRankStrategyCalculator( - minimumExecutionTimeout.AsDuration(), - fdConfiguration.AcceptableExecutionTimeIncreaseExponent, - fdConfiguration.SmallerSizeClassExecutionTimeoutMultiplier, - fdConfiguration.MaximumConvergenceError), + strategyCalculator, int(fdConfiguration.HistorySize)), nil } return NewFallbackAnalyzer(actionTimeoutExtractor), nil diff --git a/pkg/scheduler/initialsizeclass/smallest_size_class_strategy_calculator.go b/pkg/scheduler/initialsizeclass/smallest_size_class_strategy_calculator.go new file mode 100644 index 00000000..b4e58355 --- /dev/null +++ b/pkg/scheduler/initialsizeclass/smallest_size_class_strategy_calculator.go @@ -0,0 +1,33 @@ +package initialsizeclass + +import ( + "time" + + "github.com/buildbarn/bb-storage/pkg/proto/iscc" +) + +type smallestSizeClassStrategyCalculator struct{} + +func (sc smallestSizeClassStrategyCalculator) GetStrategies(perSizeClassStatsMap map[uint32]*iscc.PerSizeClassStats, sizeClasses []uint32, originalTimeout time.Duration) []Strategy { + if len(sizeClasses) <= 1 { + return nil + } + return []Strategy{ + { + Probability: 1.0, + ForegroundExecutionTimeout: originalTimeout, + }, + } +} + +func (sc smallestSizeClassStrategyCalculator) GetBackgroundExecutionTimeout(perSizeClassStatsMap map[uint32]*iscc.PerSizeClassStats, sizeClasses []uint32, sizeClassIndex int, originalTimeout time.Duration) time.Duration { + panic("Background execution should not be performed") +} + +// SmallestSizeClassStrategyCalculator implements a StrategyCalculator +// that always prefers running actions on the smallest size class. +// +// This StrategyCalculator behaves similar to FallbackAnalyzer, with the +// main difference that it still causes execution times and outcomes to +// be tracked in the Initial Size Class Cache (ISCC). +var SmallestSizeClassStrategyCalculator StrategyCalculator = smallestSizeClassStrategyCalculator{}