From ac72b54fd1128adcb65a8ee048b40ad346709753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Leszczy=C5=84ski?= <2000michal@wp.pl> Date: Thu, 3 Oct 2024 13:54:49 +0200 Subject: [PATCH] feat(restore): batch, order sstables by size This results in creating batches of sstables of more similar size. Fixes #3979 --- pkg/service/restore/batch.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pkg/service/restore/batch.go b/pkg/service/restore/batch.go index d4bfc5942..60bf89ab7 100644 --- a/pkg/service/restore/batch.go +++ b/pkg/service/restore/batch.go @@ -20,6 +20,7 @@ type batchDispatcher struct { } func newBatchDispatcher(workload []LocationWorkload, batchSize int, hostShardCnt map[string]uint, locationHosts map[Location][]string) *batchDispatcher { + sortWorkloadBySizeDesc(workload) var size int64 for _, t := range workload { size += t.Size @@ -221,3 +222,24 @@ func (b *batchDispatcher) createBatch(l *LocationWorkload, t *TableWorkload, dir SSTables: sstables, }, true } + +func sortWorkloadBySizeDesc(workload []LocationWorkload) { + slices.SortFunc(workload, func(a, b LocationWorkload) int { + return int(b.Size - a.Size) + }) + for _, loc := range workload { + slices.SortFunc(loc.Tables, func(a, b TableWorkload) int { + return int(b.Size - a.Size) + }) + for _, tab := range loc.Tables { + slices.SortFunc(tab.RemoteDirs, func(a, b RemoteDirWorkload) int { + return int(b.Size - a.Size) + }) + for _, dir := range tab.RemoteDirs { + slices.SortFunc(dir.SSTables, func(a, b RemoteSSTable) int { + return int(b.Size - a.Size) + }) + } + } + } +}