From bde7835646e16b805e392ece607f882dbe2360ad Mon Sep 17 00:00:00 2001 From: Pedro Soares Date: Mon, 14 Oct 2024 10:40:03 -0300 Subject: [PATCH] feat(metrics): report scheduler readyTarget (#642) This metric is useful for creating dynamic graphs, with visual lines representing the readyTarget or targetOccupancy, and also to create budget representations of how much we deviated above and below the target. New metric name: maestro_next.ready_target --- .../core/worker/metricsreporter/metrics.go | 24 +++++++++++++++++++ .../metrics_reporter_worker.go | 15 ++++++++++++ 2 files changed, 39 insertions(+) diff --git a/internal/core/worker/metricsreporter/metrics.go b/internal/core/worker/metricsreporter/metrics.go index 0f9be9e95..050501b05 100644 --- a/internal/core/worker/metricsreporter/metrics.go +++ b/internal/core/worker/metricsreporter/metrics.go @@ -70,6 +70,7 @@ var ( monitoring.LabelScheduler, }, }) + gameRoomErrorGaugeMetric = monitoring.CreateGaugeMetric(&monitoring.MetricOpts{ Namespace: monitoring.Namespace, Subsystem: monitoring.SubsystemWorker, @@ -80,6 +81,7 @@ var ( monitoring.LabelScheduler, }, }) + gameRoomOccupiedGaugeMetric = monitoring.CreateGaugeMetric(&monitoring.MetricOpts{ Namespace: monitoring.Namespace, Subsystem: monitoring.SubsystemWorker, @@ -134,6 +136,7 @@ var ( monitoring.LabelScheduler, }, }) + instanceErrorGaugeMetric = monitoring.CreateGaugeMetric(&monitoring.MetricOpts{ Namespace: monitoring.Namespace, Subsystem: monitoring.SubsystemWorker, @@ -144,23 +147,39 @@ var ( monitoring.LabelScheduler, }, }) + + schedulerAutoscalePolicyReadyTargetGaugeMetric = monitoring.CreateGaugeMetric(&monitoring.MetricOpts{ + Namespace: monitoring.Namespace, + Subsystem: monitoring.SubsystemWorker, + Name: "ready_target", + Help: "Ready target configured in autoscale policy", + Labels: []string{ + monitoring.LabelGame, + monitoring.LabelScheduler, + }, + }) ) func reportGameRoomReadyNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomReadyGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } + func reportGameRoomPendingNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomPendingGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } + func reportGameRoomUnreadyNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomUnreadyGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } + func reportGameRoomTerminatingNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomTerminatingGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } + func reportGameRoomErrorNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomErrorGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } + func reportGameRoomOccupiedNumber(game, schedulerName string, numberOfGameRooms int) { gameRoomOccupiedGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfGameRooms)) } @@ -180,6 +199,11 @@ func reportInstanceUnknownNumber(game, schedulerName string, numberOfInstances i func reportInstanceTerminatingNumber(game, schedulerName string, numberOfInstances int) { instanceTerminatingGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfInstances)) } + func reportInstanceErrorNumber(game, schedulerName string, numberOfInstances int) { instanceErrorGaugeMetric.WithLabelValues(game, schedulerName).Set(float64(numberOfInstances)) } + +func reportSchedulerPolicyReadyTarget(game, schedulerName string, readyTarget float64) { + schedulerAutoscalePolicyReadyTargetGaugeMetric.WithLabelValues(game, schedulerName).Set(readyTarget) +} diff --git a/internal/core/worker/metricsreporter/metrics_reporter_worker.go b/internal/core/worker/metricsreporter/metrics_reporter_worker.go index 7d48e99ed..dd1d6243c 100644 --- a/internal/core/worker/metricsreporter/metrics_reporter_worker.go +++ b/internal/core/worker/metricsreporter/metrics_reporter_worker.go @@ -79,6 +79,7 @@ func (w *MetricsReporterWorker) Start(ctx context.Context) error { case <-ticker.C: w.reportInstanceMetrics() w.reportGameRoomMetrics() + w.reportSchedulerMetrics() } } } @@ -136,6 +137,20 @@ func (w *MetricsReporterWorker) reportGameRoomMetrics() { w.reportUnreadyRooms() } +func (w *MetricsReporterWorker) reportSchedulerMetrics() { + w.logger.Info("Reporting scheduler metrics") + w.reportSchedulerAutoscale() +} + +func (w *MetricsReporterWorker) reportSchedulerAutoscale() { + if w.scheduler.Autoscaling == nil { + return + } + if w.scheduler.Autoscaling.Policy.Parameters.RoomOccupancy != nil { + reportSchedulerPolicyReadyTarget(w.scheduler.Game, w.scheduler.Name, w.scheduler.Autoscaling.Policy.Parameters.RoomOccupancy.ReadyTarget) + } +} + func (w *MetricsReporterWorker) reportPendingRooms() { pendingRooms, err := w.roomStorage.GetRoomCountByStatus(w.workerContext, w.scheduler.Name, game_room.GameStatusPending) if err != nil {