From 47d0b95a83288df07d1fdc01c6eb6dde4f6f7322 Mon Sep 17 00:00:00 2001 From: Radovan Zvoncek Date: Wed, 27 Nov 2024 19:20:27 +0200 Subject: [PATCH] K8OP-294 WIP reproduce no-pods situation in envtests + ensure it does not crash the operator --- .../medusa/medusabackupjob_controller.go | 10 +++++ .../medusa/medusabackupjob_controller_test.go | 39 ++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/controllers/medusa/medusabackupjob_controller.go b/controllers/medusa/medusabackupjob_controller.go index 36607440f..7915e0613 100644 --- a/controllers/medusa/medusabackupjob_controller.go +++ b/controllers/medusa/medusabackupjob_controller.go @@ -103,6 +103,11 @@ func (r *MedusaBackupJobReconciler) Reconcile(ctx context.Context, req ctrl.Requ logger.Error(err, "Failed to get datacenter pods") return ctrl.Result{}, err } + if len(pods) == 0 { + err = errors.NewNotFound(corev1.Resource("pod"), "no-pods") + logger.Error(err, "No pods found for datacenter", "CassandraDatacenter", cassdcKey) + return ctrl.Result{Requeue: false}, err + } // If there is anything in progress, simply requeue the request until each pod has finished or errored if len(backupJob.Status.InProgress) > 0 { @@ -186,6 +191,11 @@ func (r *MedusaBackupJobReconciler) Reconcile(ctx context.Context, req ctrl.Requ logger.Error(err, "Failed to get backup summary") return ctrl.Result{}, err } + if backupSummary == nil { + err = errors.NewNotFound(corev1.Resource("backupsummary"), "no-backup-summary") + logger.Error(err, "Backup summary not found") + return ctrl.Result{Requeue: false}, err + } if err := r.createMedusaBackup(ctx, backupJob, backupSummary, logger); err != nil { logger.Error(err, "Failed to create MedusaBackup") return ctrl.Result{}, err diff --git a/controllers/medusa/medusabackupjob_controller_test.go b/controllers/medusa/medusabackupjob_controller_test.go index e20ce82f8..480ff4ece 100644 --- a/controllers/medusa/medusabackupjob_controller_test.go +++ b/controllers/medusa/medusabackupjob_controller_test.go @@ -31,6 +31,7 @@ const ( successfulBackupName = "good-backup" failingBackupName = "bad-backup" missingBackupName = "missing-backup" + backupWithNoPods = "backup-with-no-pods" dc1PodPrefix = "192.168.1." dc2PodPrefix = "192.168.2." fakeBackupFileCount = int64(13) @@ -167,6 +168,10 @@ func testMedusaBackupDatacenter(t *testing.T, ctx context.Context, f *framework. backupCreated = createAndVerifyMedusaBackup(dc1Key, dc1, f, ctx, require, t, namespace, missingBackupName) require.False(backupCreated, "the backup object shouldn't have been created") + // in K8OP-294 we found out we can try to make backups on StatefulSets with no pods + backupCreated = createAndVerifyMedusaBackup(dc1Key, dc1, f, ctx, require, t, namespace, backupWithNoPods) + require.False(backupCreated, "the backup object shouldn't have been created") + err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: kc.Namespace, Name: kc.Name}, timeout, interval) require.NoError(err, "failed to delete K8ssandraCluster") verifyObjectDoesNotExist(ctx, t, f, dc1Key, &cassdcapi.CassandraDatacenter{}) @@ -202,13 +207,17 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa } } - createDatacenterPods(t, f, ctx, dcKey, dc) + if backupName != backupWithNoPods { + createDatacenterPods(t, f, ctx, dcKey, dc) + } dcCopy := dc.DeepCopy() dcKeyCopy := framework.NewClusterKey(f.DataPlaneContexts[0], dcKey.Namespace+"-copy", dcKey.Name) dcCopy.ObjectMeta.Namespace = dc.Namespace + "-copy" - createDatacenterPods(t, f, ctx, dcKeyCopy, dcCopy) + if backupName != backupWithNoPods { + createDatacenterPods(t, f, ctx, dcKeyCopy, dcCopy) + } t.Log("creating MedusaBackupJob") backupKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, backupName) @@ -252,6 +261,12 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa return !updated.Status.FinishTime.IsZero() }, timeout, interval) + // clean up the pods that even failing backups do not spoil the environment + if backupName != backupWithNoPods { + defer deleteDatacenterPods(t, f, ctx, dcKey, dc) + defer deleteDatacenterPods(t, f, ctx, dcKeyCopy, dc) + } + t.Log("check for the MedusaBackup being created") medusaBackupKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, backupName) medusaBackup := &api.MedusaBackup{} @@ -488,6 +503,26 @@ func findDatacenterCondition(status *cassdcapi.CassandraDatacenterStatus, condTy return nil } +func deleteDatacenterPods(t *testing.T, f *framework.Framework, ctx context.Context, dcKey framework.ClusterKey, dc *cassdcapi.CassandraDatacenter) { + for i := int32(0); i < dc.Spec.Size; i++ { + pod := &corev1.Pod{} + podName := fmt.Sprintf("%s-%s-%d", dc.Spec.ClusterName, dc.DatacenterName(), i) + podKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, podName) + err := f.Get(ctx, podKey, pod) + if err != nil { + if !errors.IsNotFound(err) { + t.Logf("failed to get pod %s: %v", podKey, err) + } + } else { + err = f.Delete(ctx, podKey, pod) + if err != nil { + t.Logf("failed to delete pod %s: %v", podKey, err) + } + } + + } +} + func createDatacenterPods(t *testing.T, f *framework.Framework, ctx context.Context, dcKey framework.ClusterKey, dc *cassdcapi.CassandraDatacenter) { _ = f.CreateNamespace(dcKey.Namespace) for i := int32(0); i < dc.Spec.Size; i++ {