From 316d79c3621956dc6da9b138e406310aba525520 Mon Sep 17 00:00:00 2001 From: Jean-Francois SMIGIELSKI Date: Wed, 23 Oct 2024 17:41:22 +0200 Subject: [PATCH] Add a format for object-size providing a wieighted random based on configured buckets (#342) Allows a comma-separated set of colon-separated pairs, describing buckets and their respective weights. This format triggers an option that performs a weighted random number generation when a new object is created. E.g.: `4096:10740,8192:1685,16384:1623` will trigger objects whose size will be chosen between 0 and 4096 with a weight of 10740, between 4096 and 8192 with a weight of 1685, or between 8192 and 16384 with a weight of 1623. --- README.md | 16 +++++++++++++ cli/generator.go | 50 +++++++++++++++++++++++++--------------- go.mod | 1 + go.sum | 2 ++ pkg/generator/options.go | 29 +++++++++++++++++++---- 5 files changed, 75 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index c5bb469b..790d1fb4 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,8 @@ By default warp uploads random data. ### Object Size +#### Fixed File Size + Most benchmarks use the `--obj.size` parameter to decide the size of objects to upload. Different benchmark types will have different default values. @@ -241,6 +243,20 @@ The average object size will be close to `--obj.size` multiplied by 0.179151. To get a value for `--obj.size` multiply the desired average object size by 5.582 to get a maximum value. +#### Bucketed File Size + +The `--obj.size` parameter accepts a string value whose format can describe size buckets. +The usage of that format activates the options of bucketed file sizes and preempts a possible activation +of random files sizes via `--obj.randsize`. + +The format of the string is a coma-separated of colon-separated pairs, describing buckets and their respective weights. +Within each bucket, the size distribution is uniform. + +E.g.: the value `4096:10740,8192:1685,16384:1623` will trigger objects whose size will be chosen +between 0 and 4096 with a weight of 10740, between 4096 and 8192 with a weight of 1685, +or between 8192 and 16384 with a weight of 1623. + + ## Automatic Termination Adding `--autoterm` parameter will enable automatic termination when results are considered stable. To detect a stable setup, warp continuously downsample the current data to diff --git a/cli/generator.go b/cli/generator.go index 5bf5a908..7dfb3d22 100644 --- a/cli/generator.go +++ b/cli/generator.go @@ -27,6 +27,8 @@ import ( "github.com/minio/cli" "github.com/minio/warp/pkg/generator" + + hist "github.com/jfsmig/prng/histogram" ) var genFlags = []cli.Flag{ @@ -83,28 +85,38 @@ func newGenSource(ctx *cli.Context, sizeField string) func() generator.Source { generator.WithCustomPrefix(ctx.String("prefix")), generator.WithPrefixSize(prefixSize), } - tokens := strings.Split(ctx.String(sizeField), ",") - switch len(tokens) { - case 1: - size, err := toSize(tokens[0]) - if err != nil { - fatalIf(probe.NewError(err), "Invalid obj.size specified") - } - opts = append(opts, generator.WithSize(int64(size))) - case 2: - minSize, err := toSize(tokens[0]) - if err != nil { - fatalIf(probe.NewError(err), "Invalid min obj.size specified") + if strings.IndexRune(ctx.String(sizeField), ':') > 0 { + if _, err := hist.ParseCSV(ctx.String(sizeField)); err != nil { + fatalIf(probe.NewError(err), "Invalid histogram format for the size parameter") + } else { + opts = append(opts, generator.WithSizeHistograms(ctx.String(sizeField))) } - maxSize, err := toSize(tokens[1]) - if err != nil { - fatalIf(probe.NewError(err), "Invalid max obj.size specified") + } else { + tokens := strings.Split(ctx.String(sizeField), ",") + switch len(tokens) { + case 1: + size, err := toSize(tokens[0]) + if err != nil { + fatalIf(probe.NewError(err), "Invalid obj.size specified") + } + opts = append(opts, generator.WithSize(int64(size))) + case 2: + minSize, err := toSize(tokens[0]) + if err != nil { + fatalIf(probe.NewError(err), "Invalid min obj.size specified") + } + maxSize, err := toSize(tokens[1]) + if err != nil { + fatalIf(probe.NewError(err), "Invalid max obj.size specified") + } + opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize))) + default: + fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter") } - opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize))) - default: - fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter") + + opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...) } - opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...) + src, err := generator.NewFn(opts...) fatalIf(probe.NewError(err), "Unable to create data generator") return src diff --git a/go.mod b/go.mod index fcb13ebd..83b63a44 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/dustin/go-humanize v1.0.1 github.com/fatih/color v1.17.0 github.com/influxdata/influxdb-client-go/v2 v2.13.0 + github.com/jfsmig/prng v0.0.2 github.com/klauspost/compress v1.17.9 github.com/minio/cli v1.24.2 github.com/minio/madmin-go/v3 v3.0.51 diff --git a/go.sum b/go.sum index 0e86b529..b4b8877b 100644 --- a/go.sum +++ b/go.sum @@ -48,6 +48,8 @@ github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xG github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0sr5D8LolXHqAAOfPw9v/RIRHl4= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU= github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= +github.com/jfsmig/prng v0.0.2 h1:aZun+YgmBnUyhqvI+EDjwmOYc1kCPsihdEr9V/1YlGA= +github.com/jfsmig/prng v0.0.2/go.mod h1:bz1fX1aizp8/Lu1thLzfirh5uExjC1lVwB8SSt6ExpE= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= diff --git a/pkg/generator/options.go b/pkg/generator/options.go index 24d200be..9edab33c 100644 --- a/pkg/generator/options.go +++ b/pkg/generator/options.go @@ -20,6 +20,8 @@ package generator import ( "errors" "math/rand" + + hist "github.com/jfsmig/prng/histogram" ) // Options provides options. @@ -33,6 +35,10 @@ type Options struct { totalSize int64 randomPrefix int randSize bool + + // Activates the use of a distribution of sizes + flagSizesDistribution bool + sizesDistribution hist.Int64Distribution } // OptionApplier allows to abstract generator options. @@ -42,6 +48,9 @@ type OptionApplier interface { // getSize will return a size for an object. func (o Options) getSize(rng *rand.Rand) int64 { + if o.flagSizesDistribution { + return o.sizesDistribution.Poll(rng) + } if !o.randSize { return o.totalSize } @@ -59,20 +68,32 @@ func defaultOptions() Options { return o } +func WithSizeHistograms(encoded string) Option { + return func(o *Options) error { + var err error + o.sizesDistribution, err = hist.ParseCSV(encoded) + if err != nil { + return err + } + o.flagSizesDistribution = true + return nil + } +} + // WithMinMaxSize sets the min and max size of the generated data. func WithMinMaxSize(min, max int64) Option { return func(o *Options) error { if min <= 0 { - return errors.New("WithSize: minSize must be >= 0") + return errors.New("WithMinMaxSize: minSize must be >= 0") } if max < 0 { - return errors.New("WithSize: maxSize must be > 0") + return errors.New("WithMinMaxSize: maxSize must be > 0") } if min > max { - return errors.New("WithSize: minSize must be < maxSize") + return errors.New("WithMinMaxSize: minSize must be < maxSize") } if o.randSize && max < 256 { - return errors.New("WithSize: random sized objects should be at least 256 bytes") + return errors.New("WithMinMaxSize: random sized objects should be at least 256 bytes") } o.totalSize = max