Skip to content

Commit

Permalink
Allow PageRank to be used independently of the ISCC
Browse files Browse the repository at this point in the history
For certain workloads it may not be necessary to use PageRank to
automatically pick an optimally sized worker. Still, it may be useful to
enable the Initial Size Class Cache (ISCC). That way it's possible to
observe timing statistics in bb_browser.

This change adds a simple StrategyCalculator that is functionally
equivalent to FallbackAnalyzer. It always runs actions on the smallest
size class, falling back to running it on the largest.
  • Loading branch information
EdSchouten committed Oct 10, 2023
1 parent 0bcfdc0 commit f94b63a
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 41 deletions.
77 changes: 44 additions & 33 deletions pkg/proto/configuration/scheduler/scheduler.proto
Original file line number Diff line number Diff line change
Expand Up @@ -152,23 +152,13 @@ message InitialSizeClassAnalyzerConfiguration {
// Maximum permitted execution timeout.
google.protobuf.Duration maximum_execution_timeout = 2;

// When not set, run all actions on the smallest size class for which
// workers exist. Upon failure, retry actions on the largest size
// class. This mode is not recommended for setups with more than two
// size classes, or workloads where build times matter.
// When set, persist statistics on execution times and outcomes into
// the Initial Size Class Cache (ISCC), so that future invocations of
// similar actions can be scheduled more intelligently.
//
// When set, run all actions on the largest size class if not seen
// before. Future invocations of actions with the same command line
// arguments and environment variables will run on all size classes,
// using probabilities based on how their execution times compare to
// those of the largest size class.
//
// Statistics on execution times are persisted into the Initial Size
// Class Cache (ISCC). bb_browser is also capable of reading data from
// this data store, making it possible to view these statistics by
// visiting the page of an action. Because of this, enabling this
// option requires that bb_scheduler's 'initial_size_class_cache'
// option is set as well.
// bb_browser is also capable of reading data from this data store,
// making it possible to view these statistics by visiting the page of
// an action.
InitialSizeClassFeedbackDrivenAnalyzerConfiguration feedback_driven = 3;
}

Expand All @@ -191,6 +181,40 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration {
// Recommended value: 86400s (1 day)
google.protobuf.Duration failure_cache_duration = 1;

// Was 'acceptable_execution_time_increase_exponent',
// 'smaller_size_class_execution_timeout_multiplier',
// 'minimum_execution_timeout', and 'maximum_convergence_error'.
//
// These options have moved into 'page_rank'.
reserved 2, 3, 4, 5;

// The number of action outcomes to store per size class. Increasing
// this improves the accuracy of timing information that is captured,
// but has the downside that the system responds to changes in
// behavior of actions less quickly.
//
// To ensure that the system does not end up in a steady state where
// actions are always run on the same size class, there is roughly a
// 1.0 / history_size probability that actions are run on sizes
// classes other than the optimum, regardless of historical outcomes.
//
// Recommended value: 32
int32 history_size = 6;

// When not set, run all actions on the smallest size class for which
// workers exist. Upon failure, retry actions on the largest size
// class. This mode is not recommended for setups with more than two
// size classes, or workloads where build times matter.
//
// When set, run all actions on the largest size class if not seen
// before. Future invocations of actions with the same command line
// arguments and environment variables will run on all size classes,
// using probabilities based on how their execution times compare to
// those of the largest size class.
InitialSizeClassPageRankStrategyCalculatorConfiguration page_rank = 7;
}

message InitialSizeClassPageRankStrategyCalculatorConfiguration {
// An exponent to determine whether an increase in execution time when
// scheduling an action on a smaller size class is considere
// acceptable.
Expand Down Expand Up @@ -220,7 +244,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration {
// may cause actions that are only somewhat parallel to run slower.
//
// Recommended value: somewhere between 0.2 and 0.8.
double acceptable_execution_time_increase_exponent = 2;
double acceptable_execution_time_increase_exponent = 1;

// Actions scheduled on smaller size classes are run with a reduced
// timeout value that is based on the acceptable execution time of the
Expand All @@ -239,7 +263,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration {
// insert a higher quality data point into the ISCC.
//
// Recommended value: 1.5
double smaller_size_class_execution_timeout_multiplier = 3;
double smaller_size_class_execution_timeout_multiplier = 2;

// The execution timeout value that is used on smaller size classes is
// proportional to the median execution time observed on the largest
Expand All @@ -251,7 +275,7 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration {
// be used to set an lower bound on the execution timeout.
//
// Recommended value: 10s
google.protobuf.Duration minimum_execution_timeout = 4;
google.protobuf.Duration minimum_execution_timeout = 3;

// This implementation compares previous execution stats between every
// pair of size classes. The resulting scores are stored in a
Expand Down Expand Up @@ -284,18 +308,5 @@ message InitialSizeClassFeedbackDrivenAnalyzerConfiguration {
// are made to this option to assess the performance impact.
//
// Recommended value: 0.002
double maximum_convergence_error = 5;

// The number of action outcomes to store per size class. Increasing
// this improves the accuracy of timing information that is captured,
// but has the downside that the system responds to changes in
// behavior of actions less quickly.
//
// To ensure that the system does not end up in a steady state where
// actions are always run on the same size class, there is roughly a
// 1.0 / history_size probability that actions are run on sizes
// classes other than the optimum, regardless of historical outcomes.
//
// Recommended value: 32
int32 history_size = 6;
double maximum_convergence_error = 4;
}
1 change: 1 addition & 0 deletions pkg/scheduler/initialsizeclass/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ go_library(
"feedback_driven_analyzer.go",
"outcomes.go",
"page_rank_strategy_calculator.go",
"smallest_size_class_strategy_calculator.go",
"strategy_calculator.go",
],
importpath = "github.com/buildbarn/bb-remote-execution/pkg/scheduler/initialsizeclass",
Expand Down
22 changes: 14 additions & 8 deletions pkg/scheduler/initialsizeclass/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,27 @@ func NewAnalyzerFromConfiguration(configuration *pb.InitialSizeClassAnalyzerConf
if err := failureCacheDuration.CheckValid(); err != nil {
return nil, util.StatusWrap(err, "Invalid failure cache duration")
}
minimumExecutionTimeout := fdConfiguration.MinimumExecutionTimeout
if err := minimumExecutionTimeout.CheckValid(); err != nil {
return nil, util.StatusWrap(err, "Invalid minimum acceptable execution time")

strategyCalculator := SmallestSizeClassStrategyCalculator
if pageRankConfiguration := fdConfiguration.PageRank; pageRankConfiguration != nil {
minimumExecutionTimeout := pageRankConfiguration.MinimumExecutionTimeout
if err := minimumExecutionTimeout.CheckValid(); err != nil {
return nil, util.StatusWrap(err, "Invalid minimum acceptable execution time")
}
strategyCalculator = NewPageRankStrategyCalculator(
minimumExecutionTimeout.AsDuration(),
pageRankConfiguration.AcceptableExecutionTimeIncreaseExponent,
pageRankConfiguration.SmallerSizeClassExecutionTimeoutMultiplier,
pageRankConfiguration.MaximumConvergenceError)
}

return NewFeedbackDrivenAnalyzer(
previousExecutionStatsStore,
random.NewFastSingleThreadedGenerator(),
clock.SystemClock,
actionTimeoutExtractor,
failureCacheDuration.AsDuration(),
NewPageRankStrategyCalculator(
minimumExecutionTimeout.AsDuration(),
fdConfiguration.AcceptableExecutionTimeIncreaseExponent,
fdConfiguration.SmallerSizeClassExecutionTimeoutMultiplier,
fdConfiguration.MaximumConvergenceError),
strategyCalculator,
int(fdConfiguration.HistorySize)), nil
}
return NewFallbackAnalyzer(actionTimeoutExtractor), nil
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package initialsizeclass

import (
"time"

"github.com/buildbarn/bb-storage/pkg/proto/iscc"
)

type smallestSizeClassStrategyCalculator struct{}

func (sc smallestSizeClassStrategyCalculator) GetStrategies(perSizeClassStatsMap map[uint32]*iscc.PerSizeClassStats, sizeClasses []uint32, originalTimeout time.Duration) []Strategy {
if len(sizeClasses) <= 1 {
return nil
}
return []Strategy{
{
Probability: 1.0,
ForegroundExecutionTimeout: originalTimeout,
},
}
}

func (sc smallestSizeClassStrategyCalculator) GetBackgroundExecutionTimeout(perSizeClassStatsMap map[uint32]*iscc.PerSizeClassStats, sizeClasses []uint32, sizeClassIndex int, originalTimeout time.Duration) time.Duration {
panic("Background execution should not be performed")
}

// SmallestSizeClassStrategyCalculator implements a StrategyCalculator
// that always prefers running actions on the smallest size class.
//
// This StrategyCalculator behaves similar to FallbackAnalyzer, with the
// main difference that it still causes execution times and outcomes to
// be tracked in the Initial Size Class Cache (ISCC).
var SmallestSizeClassStrategyCalculator StrategyCalculator = smallestSizeClassStrategyCalculator{}

0 comments on commit f94b63a

Please sign in to comment.