Skip to content

Commit

Permalink
fix(watcher): reset mitigation if total rooms is 1
Browse files Browse the repository at this point in the history
If the total rooms is 1, we need to allow disruptions in the runtime.
Thus, we reset the mitigation to be applied in 0 rooms instead of 1.
  • Loading branch information
hspedro committed Mar 15, 2024
1 parent dade1df commit d2e0309
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 30 deletions.
3 changes: 3 additions & 0 deletions docs/reference/Kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ runtimeWatcher:
safetyPercentage: 0.05
```
We should always allow node consolidation to happen, thus if the total number of rooms
is 1, then we set PDB's `minAvailable: 0`.

#### Pod Change Events Worker

For this type of worker, runtime watcher spawns multiple goroutines, maintaining a worker
Expand Down
7 changes: 0 additions & 7 deletions internal/adapters/runtime/kubernetes/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,6 @@ func (k *kubernetes) createPDBFromScheduler(ctx context.Context, scheduler *enti
},
}

if scheduler.Autoscaling != nil && scheduler.Autoscaling.Enabled {
pdbSpec.Spec.MinAvailable = &intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(scheduler.Autoscaling.Min),
}
}

pdb, err := k.clientSet.PolicyV1().PodDisruptionBudgets(scheduler.Name).Create(ctx, pdbSpec, metav1.CreateOptions{})
if err != nil && !kerrors.IsAlreadyExists(err) {
k.logger.Warn("error creating pdb", zap.String("scheduler", scheduler.Name), zap.Error(err))
Expand Down
26 changes: 7 additions & 19 deletions internal/adapters/runtime/kubernetes/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func TestPDBCreationAndDeletion(t *testing.T) {
require.Contains(t, pdb.Labels["app.kubernetes.io/managed-by"], "maestro")
})

t.Run("create pdb from scheduler with autoscaling", func(t *testing.T) {
t.Run("pdb should not use scheduler min as minAvailable", func(t *testing.T) {
if !kubernetesRuntime.isPDBSupported() {
t.Log("Kubernetes version does not support PDB, skipping")
t.SkipNow()
Expand Down Expand Up @@ -166,7 +166,7 @@ func TestPDBCreationAndDeletion(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, pdb)
require.Equal(t, pdb.Name, scheduler.Name)
require.Equal(t, pdb.Spec.MinAvailable.IntVal, int32(2))
require.Equal(t, pdb.Spec.MinAvailable.IntVal, int32(0))
})

t.Run("delete pdb on scheduler deletion", func(t *testing.T) {
Expand Down Expand Up @@ -233,27 +233,14 @@ func TestMitigateDisruption(t *testing.T) {
require.Equal(t, pdb.Spec.MinAvailable.IntVal, int32(0))
})

t.Run("should update PDB on mitigatation if not equal to current minAvailable", func(t *testing.T) {
t.Run("should update PDB on mitigation if not equal to current value", func(t *testing.T) {
if !kubernetesRuntime.isPDBSupported() {
t.Log("Kubernetes version does not support PDB, skipping")
t.SkipNow()
}

scheduler := &entities.Scheduler{
Name: "scheduler-pdb-mitigation-update",
Autoscaling: &autoscaling.Autoscaling{
Enabled: true,
Min: 100,
Max: 200,
Policy: autoscaling.Policy{
Type: autoscaling.RoomOccupancy,
Parameters: autoscaling.PolicyParameters{
RoomOccupancy: &autoscaling.RoomOccupancyParams{
ReadyTarget: 0.1,
},
},
},
},
}
err := kubernetesRuntime.CreateScheduler(ctx, scheduler)
if err != nil {
Expand All @@ -271,9 +258,10 @@ func TestMitigateDisruption(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, pdb)
require.Equal(t, pdb.Name, scheduler.Name)
require.Equal(t, pdb.Spec.MinAvailable.IntVal, int32(scheduler.Autoscaling.Min))
require.Equal(t, pdb.Spec.MinAvailable.IntVal, 0)

err = kubernetesRuntime.MitigateDisruption(ctx, scheduler, scheduler.Autoscaling.Min, 0.0)
occupiedRooms := 100
err = kubernetesRuntime.MitigateDisruption(ctx, scheduler, occupiedRooms, 0.0)
require.NoError(t, err)

pdb, err = client.PolicyV1().PodDisruptionBudgets(scheduler.Name).Get(ctx, scheduler.Name, metav1.GetOptions{})
Expand All @@ -282,7 +270,7 @@ func TestMitigateDisruption(t *testing.T) {
require.Equal(t, pdb.Name, scheduler.Name)

incSafetyPercentage := 1.0 + DefaultDisruptionSafetyPercentage
newRoomAmount := int32(float64(scheduler.Autoscaling.Min) * incSafetyPercentage)
newRoomAmount := int32(float64(occupiedRooms) * incSafetyPercentage)
require.Equal(t, pdb.Spec.MinAvailable.IntVal, newRoomAmount)
})

Expand Down
20 changes: 16 additions & 4 deletions internal/core/worker/runtimewatcher/runtime_watcher_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,28 @@ func (w *runtimeWatcherWorker) spawnUpdateRoomWatchers(resultChan chan game_room
}

func (w *runtimeWatcherWorker) mitigateDisruptions() error {
occupiedRoomsAmount, err := w.roomStorage.GetRoomCountByStatus(w.ctx, w.scheduler.Name, game_room.GameStatusOccupied)
totalRoomsAmount, err := w.roomStorage.GetRoomCount(w.ctx, w.scheduler.Name)
if err != nil {
w.logger.Error(
"failed to get occupied rooms for scheduler",
"failed to get total rooms amount for scheduler",
zap.String("scheduler", w.scheduler.Name),
zap.Error(err),
)
return err
}
err = w.runtime.MitigateDisruption(w.ctx, w.scheduler, occupiedRoomsAmount, w.config.DisruptionSafetyPercentage)
mitigateForRoomsAmount := 0
if totalRoomsAmount >= 2 {
mitigateForRoomsAmount, err = w.roomStorage.GetRoomCountByStatus(w.ctx, w.scheduler.Name, game_room.GameStatusOccupied)
if err != nil {
w.logger.Error(
"failed to get occupied rooms for scheduler",
zap.String("scheduler", w.scheduler.Name),
zap.Error(err),
)
return err
}
}
err = w.runtime.MitigateDisruption(w.ctx, w.scheduler, mitigateForRoomsAmount, w.config.DisruptionSafetyPercentage)
if err != nil {
w.logger.Error(
"failed to mitigate disruption",
Expand All @@ -122,7 +134,7 @@ func (w *runtimeWatcherWorker) mitigateDisruptions() error {
w.logger.Debug(
"mitigated disruption for occupied rooms",
zap.String("scheduler", w.scheduler.Name),
zap.Int("occupiedRooms", occupiedRoomsAmount),
zap.Int("mitigateForRoomsAmount", mitigateForRoomsAmount),
)

return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ func TestRuntimeWatcher_Start(t *testing.T) {
runtimeWatcher.EXPECT().Stop()

runtime.EXPECT().MitigateDisruption(gomock.Any(), gomock.Any(), 0, 0.0).Return(nil).MinTimes(0)
roomStorage.EXPECT().GetRoomCount(gomock.Any(), gomock.Any()).Return(2, nil).MinTimes(0)
roomStorage.EXPECT().GetRoomCountByStatus(gomock.Any(), gomock.Any(), gomock.Any()).Return(0, nil).MinTimes(0)

ctx, cancelFunc := context.WithCancel(context.Background())
Expand Down

0 comments on commit d2e0309

Please sign in to comment.