From e8831f31b66571119da351c77f319c3da4c74e14 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 12:48:26 +1100 Subject: [PATCH 01/26] Completion of a restore triggers secrets refresh. --- apis/k8ssandra/v1alpha1/constants.go | 2 + .../medusa/medusarestorejob_controller.go | 2 + pkg/medusa/refresh_secrets.go | 49 +++++++++++++ pkg/medusa/refresh_secrets_test.go | 68 +++++++++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 pkg/medusa/refresh_secrets.go create mode 100644 pkg/medusa/refresh_secrets_test.go diff --git a/apis/k8ssandra/v1alpha1/constants.go b/apis/k8ssandra/v1alpha1/constants.go index 2d61209e4..68b99364d 100644 --- a/apis/k8ssandra/v1alpha1/constants.go +++ b/apis/k8ssandra/v1alpha1/constants.go @@ -59,6 +59,8 @@ const ( K8ssandraClusterNamespaceLabel = "k8ssandra.io/cluster-namespace" DatacenterLabel = "k8ssandra.io/datacenter" + // Forces refresh of secrets which relate to roles and authn in Cassandra. + RefreshAnnotation = "k8ssandra.io/refresh" ) var ( diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 3308acc96..822169086 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -177,6 +177,8 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req } request.Log.Info("The restore operation is complete") + medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) + return ctrl.Result{}, nil } diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go new file mode 100644 index 000000000..62be2f8f3 --- /dev/null +++ b/pkg/medusa/refresh_secrets.go @@ -0,0 +1,49 @@ +package medusa + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" + "github.com/k8ssandra/k8ssandra-operator/pkg/reconciliation" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + defaultSUSecretName = "cass-superuser" +) + +func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { + logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) + userSecrets := []string{} + + for _, user := range dc.Spec.Users { + userSecrets = append(userSecrets, user.SecretName) + } + if dc.Spec.SuperuserSecretName == "" { + userSecrets = append(userSecrets, defaultSUSecretName) //default SU secret + } else { + userSecrets = append(userSecrets, dc.Spec.SuperuserSecretName) + } + // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. + for _, i := range userSecrets { + secret := &corev1.Secret{} + err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) + return err + } + if secret.ObjectMeta.Annotations == nil { + secret.ObjectMeta.Annotations = make(map[string]string) + } + secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = time.Now().String() + reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) + } + return nil + +} diff --git a/pkg/medusa/refresh_secrets_test.go b/pkg/medusa/refresh_secrets_test.go new file mode 100644 index 000000000..c06a15854 --- /dev/null +++ b/pkg/medusa/refresh_secrets_test.go @@ -0,0 +1,68 @@ +package medusa + +import ( + "context" + "testing" + + "github.com/go-logr/logr" + cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + "github.com/k8ssandra/k8ssandra-operator/pkg/test" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +func TestRefreshSecrets_defaultSUSecret(t *testing.T) { + fakeClient := test.NewFakeClientWRestMapper() + cassDC := test.NewCassandraDatacenter("dc1", "test") + cassDC.Spec.Users = []cassdcapi.CassandraUser{ + {SecretName: "custom-user"}, + } + assert.NoError(t, fakeClient.Create(context.Background(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "test"}})) + assert.NoError(t, fakeClient.Create(context.Background(), &cassDC)) + secrets := []corev1.Secret{ + {ObjectMeta: metav1.ObjectMeta{Name: "custom-user", Namespace: "test"}, Data: map[string][]byte{"username": []byte("test")}}, + {ObjectMeta: metav1.ObjectMeta{Name: "cass-superuser", Namespace: "test"}, Data: map[string][]byte{"username": []byte("test")}}, + } + for _, i := range secrets { + assert.NoError(t, fakeClient.Create(context.Background(), &i)) + } + assert.NoError(t, RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0)) + suSecret := &corev1.Secret{} + assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "cass-superuser", Namespace: "test"}, suSecret)) + _, exists := suSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] + assert.True(t, exists) + userSecret := &corev1.Secret{} + assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "custom-user", Namespace: "test"}, userSecret)) + _, exists = userSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] + assert.True(t, exists) +} + +func TestRefreshSecrets_customSecrets(t *testing.T) { + fakeClient := test.NewFakeClientWRestMapper() + cassDC := test.NewCassandraDatacenter("dc1", "test") + cassDC.Spec.Users = []cassdcapi.CassandraUser{ + {SecretName: "custom-user"}, + } + cassDC.Spec.SuperuserSecretName = "cass-custom-superuser" + assert.NoError(t, fakeClient.Create(context.Background(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "test"}})) + assert.NoError(t, fakeClient.Create(context.Background(), &cassDC)) + secrets := []corev1.Secret{ + {ObjectMeta: metav1.ObjectMeta{Name: "custom-user", Namespace: "test"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "cass-custom-superuser", Namespace: "test"}}, + } + for _, i := range secrets { + assert.NoError(t, fakeClient.Create(context.Background(), &i)) + } + assert.NoError(t, RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0)) + suSecret := &corev1.Secret{} + assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "cass-custom-superuser", Namespace: "test"}, suSecret)) + _, exists := suSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] + assert.True(t, exists) + userSecret := &corev1.Secret{} + assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "custom-user", Namespace: "test"}, userSecret)) + _, exists = userSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] + assert.True(t, exists) + +} From 1ce6ce124c8ed78645bb9240d544127ec8da5a5c Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 13:02:34 +1100 Subject: [PATCH 02/26] Fix error check. --- controllers/medusa/medusarestorejob_controller.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 822169086..2f00fc931 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -177,7 +177,13 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req } request.Log.Info("The restore operation is complete") - medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) + + err = medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) + if err != nil { + request.Log.Error(err, "Failed to refresh Cassandra users in the DB") + // Not going to bother applying updates here since we hit an error. + return ctrl.Result{RequeueAfter: r.DefaultDelay}, err + } return ctrl.Result{}, nil } From 8b29f6866979235e2a4618dea2593accef547748 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 14:21:02 +1100 Subject: [PATCH 03/26] Add a test to the envtests too. --- controllers/medusa/medusarestorejob_controller_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/controllers/medusa/medusarestorejob_controller_test.go b/controllers/medusa/medusarestorejob_controller_test.go index 87ec12e37..310fa3763 100644 --- a/controllers/medusa/medusarestorejob_controller_test.go +++ b/controllers/medusa/medusarestorejob_controller_test.go @@ -8,6 +8,7 @@ import ( cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ss "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" + k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" api "github.com/k8ssandra/k8ssandra-operator/apis/medusa/v1alpha1" "github.com/k8ssandra/k8ssandra-operator/pkg/images" "github.com/k8ssandra/k8ssandra-operator/pkg/medusa" @@ -292,8 +293,12 @@ func testMedusaRestoreDatacenter(t *testing.T, ctx context.Context, f *framework return !restore.Status.FinishTime.IsZero() }, timeout, interval) - err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: kc.Namespace, Name: kc.Name}, timeout, interval) + err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: dc.Namespace, Name: kc.Name}, timeout, interval) require.NoError(err, "failed to delete K8ssandraCluster") + superuserSecret := corev1.Secret{} + require.NoError(f.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "cass-superuser"}, &superuserSecret)) + assert.Contains(t, superuserSecret.ObjectMeta.Annotations, k8ssandraapi.RefreshAnnotation) + } func testValidationErrorStopsRestore(t *testing.T, ctx context.Context, f *framework.Framework, namespace string) { From 281b556afe12ec1c007fa94588cc886fc6240a81 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 16:58:29 +1100 Subject: [PATCH 04/26] Fix envtest. --- controllers/medusa/medusarestorejob_controller_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/controllers/medusa/medusarestorejob_controller_test.go b/controllers/medusa/medusarestorejob_controller_test.go index 310fa3763..f5e6bf327 100644 --- a/controllers/medusa/medusarestorejob_controller_test.go +++ b/controllers/medusa/medusarestorejob_controller_test.go @@ -293,12 +293,13 @@ func testMedusaRestoreDatacenter(t *testing.T, ctx context.Context, f *framework return !restore.Status.FinishTime.IsZero() }, timeout, interval) - err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: dc.Namespace, Name: kc.Name}, timeout, interval) - require.NoError(err, "failed to delete K8ssandraCluster") superuserSecret := corev1.Secret{} require.NoError(f.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "cass-superuser"}, &superuserSecret)) assert.Contains(t, superuserSecret.ObjectMeta.Annotations, k8ssandraapi.RefreshAnnotation) + err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: dc.Namespace, Name: kc.Name}, timeout, interval) + require.NoError(err, "failed to delete K8ssandraCluster") + } func testValidationErrorStopsRestore(t *testing.T, ctx context.Context, f *framework.Framework, namespace string) { From 85d9aed4ce88efdbcb826e2368eb9c5e0bb7ad41 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 17:28:09 +1100 Subject: [PATCH 05/26] Delete envtest test. --- controllers/medusa/medusarestorejob_controller_test.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/controllers/medusa/medusarestorejob_controller_test.go b/controllers/medusa/medusarestorejob_controller_test.go index f5e6bf327..2e8c2a38f 100644 --- a/controllers/medusa/medusarestorejob_controller_test.go +++ b/controllers/medusa/medusarestorejob_controller_test.go @@ -8,7 +8,6 @@ import ( cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ss "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" - k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" api "github.com/k8ssandra/k8ssandra-operator/apis/medusa/v1alpha1" "github.com/k8ssandra/k8ssandra-operator/pkg/images" "github.com/k8ssandra/k8ssandra-operator/pkg/medusa" @@ -293,10 +292,6 @@ func testMedusaRestoreDatacenter(t *testing.T, ctx context.Context, f *framework return !restore.Status.FinishTime.IsZero() }, timeout, interval) - superuserSecret := corev1.Secret{} - require.NoError(f.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: "cass-superuser"}, &superuserSecret)) - assert.Contains(t, superuserSecret.ObjectMeta.Annotations, k8ssandraapi.RefreshAnnotation) - err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: dc.Namespace, Name: kc.Name}, timeout, interval) require.NoError(err, "failed to delete K8ssandraCluster") From 111c5ff2e1b435fd1de70dab201ce6df479988f2 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 11 Dec 2023 17:33:00 +1100 Subject: [PATCH 06/26] Add e2e test. --- test/e2e/medusa_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 9f4403462..9bb5ec4e3 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -7,6 +7,7 @@ import ( cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" api "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" + k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" medusa "github.com/k8ssandra/k8ssandra-operator/apis/medusa/v1alpha1" "github.com/k8ssandra/k8ssandra-operator/pkg/cassandra" medusapkg "github.com/k8ssandra/k8ssandra-operator/pkg/medusa" @@ -211,6 +212,17 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 return !restore.Status.FinishTime.IsZero() }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") + + require.Eventually(func() bool { + secret := &corev1.Secret{} + err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secret) + if err != nil { + return false + } + _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] + return exists + }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "superuser secret wasn't updated with refresh annotation") + } func checkMedusaStandaloneDeploymentExists(t *testing.T, ctx context.Context, dcKey framework.ClusterKey, f *framework.E2eFramework, kc *api.K8ssandraCluster) { From a0e6f0223456883a7e5a8b02433f7a385e775147 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Tue, 12 Dec 2023 10:27:25 +1100 Subject: [PATCH 07/26] more logging for medusa tests. --- test/e2e/medusa_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 9bb5ec4e3..2269a63d8 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -217,6 +217,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 secret := &corev1.Secret{} err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secret) if err != nil { + t.Log(err) return false } _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] From 227a42effc73f2c78d1388e04538092c4046a4a7 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Tue, 12 Dec 2023 12:49:52 +1100 Subject: [PATCH 08/26] Add some logging to allow debugging. --- test/e2e/medusa_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 2269a63d8..550085315 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -218,6 +218,14 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secret) if err != nil { t.Log(err) + secretList := &corev1.SecretList{} + err = f.List(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secretList) + if err != nil { + t.Log(err) + } + for _, i := range secretList.Items { + t.Log(i.Name) + } return false } _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] From 8277e188a6eaac10adfd13e1eb9f711ab8995f24 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Tue, 12 Dec 2023 14:36:22 +1100 Subject: [PATCH 09/26] Ensure right default username is used. --- pkg/medusa/refresh_secrets.go | 6 +----- pkg/medusa/refresh_secrets_test.go | 4 ++-- test/e2e/medusa_test.go | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index 62be2f8f3..95223e5dc 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -14,10 +14,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - defaultSUSecretName = "cass-superuser" -) - func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} @@ -26,7 +22,7 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie userSecrets = append(userSecrets, user.SecretName) } if dc.Spec.SuperuserSecretName == "" { - userSecrets = append(userSecrets, defaultSUSecretName) //default SU secret + userSecrets = append(userSecrets, cassdcapi.CleanupForKubernetes(dc.Spec.ClusterName)+"-superuser") //default SU secret } else { userSecrets = append(userSecrets, dc.Spec.SuperuserSecretName) } diff --git a/pkg/medusa/refresh_secrets_test.go b/pkg/medusa/refresh_secrets_test.go index c06a15854..908535d83 100644 --- a/pkg/medusa/refresh_secrets_test.go +++ b/pkg/medusa/refresh_secrets_test.go @@ -23,14 +23,14 @@ func TestRefreshSecrets_defaultSUSecret(t *testing.T) { assert.NoError(t, fakeClient.Create(context.Background(), &cassDC)) secrets := []corev1.Secret{ {ObjectMeta: metav1.ObjectMeta{Name: "custom-user", Namespace: "test"}, Data: map[string][]byte{"username": []byte("test")}}, - {ObjectMeta: metav1.ObjectMeta{Name: "cass-superuser", Namespace: "test"}, Data: map[string][]byte{"username": []byte("test")}}, + {ObjectMeta: metav1.ObjectMeta{Name: "test-cluster-superuser", Namespace: "test"}, Data: map[string][]byte{"username": []byte("test")}}, } for _, i := range secrets { assert.NoError(t, fakeClient.Create(context.Background(), &i)) } assert.NoError(t, RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0)) suSecret := &corev1.Secret{} - assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "cass-superuser", Namespace: "test"}, suSecret)) + assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "test-cluster-superuser", Namespace: "test"}, suSecret)) _, exists := suSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] assert.True(t, exists) userSecret := &corev1.Secret{} diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 550085315..dc5b79fac 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -215,11 +215,11 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 require.Eventually(func() bool { secret := &corev1.Secret{} - err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secret) + err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "firstcluster-superuser"), secret) if err != nil { t.Log(err) secretList := &corev1.SecretList{} - err = f.List(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "cass-superuser"), secretList) + err = f.List(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "firstcluster-superuser"), secretList) if err != nil { t.Log(err) } From 66c45af2af0b3f91e85cf0ac23ef64b38c2982ea Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Tue, 12 Dec 2023 17:39:29 +1100 Subject: [PATCH 10/26] Deal with multiple different cluster names leading to different superuser names. --- test/e2e/medusa_test.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index dc5b79fac..01f0ec130 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -215,17 +215,9 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 require.Eventually(func() bool { secret := &corev1.Secret{} - err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "firstcluster-superuser"), secret) + err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, cassdcapi.CleanupForKubernetes(dcKey.Name)+"-superuser"), secret) if err != nil { t.Log(err) - secretList := &corev1.SecretList{} - err = f.List(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "firstcluster-superuser"), secretList) - if err != nil { - t.Log(err) - } - for _, i := range secretList.Items { - t.Log(i.Name) - } return false } _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] From c4e1eee85f47a33f9ad77a739c2a061641d056c0 Mon Sep 17 00:00:00 2001 From: Miles Garnsey <11435896+Miles-Garnsey@users.noreply.github.com> Date: Wed, 13 Dec 2023 11:11:02 +1100 Subject: [PATCH 11/26] Alex's suggested test change. Co-authored-by: Alexander Dejanovski --- test/e2e/medusa_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 01f0ec130..effa69b9c 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -215,7 +215,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 require.Eventually(func() bool { secret := &corev1.Secret{} - err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, cassdcapi.CleanupForKubernetes(dcKey.Name)+"-superuser"), secret) + err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "test-superuser", secret) if err != nil { t.Log(err) return false From 5ec13af82011ae7e16c84a73686fa5f7f268e930 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Wed, 13 Dec 2023 11:14:14 +1100 Subject: [PATCH 12/26] Fix error introduced by last change. --- test/e2e/medusa_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index effa69b9c..89aef88a9 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -215,7 +215,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 require.Eventually(func() bool { secret := &corev1.Secret{} - err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "test-superuser", secret) + err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "test-superuser"), secret) if err != nil { t.Log(err) return false From 2166919275e24619d6015b27581e5bcc6b56fc8b Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Wed, 13 Dec 2023 12:03:24 +1100 Subject: [PATCH 13/26] Fix tests so that they always retrieve the right secret. --- test/e2e/medusa_test.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 89aef88a9..105e78ef6 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -214,8 +214,17 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") require.Eventually(func() bool { + dc := &cassdcapi.CassandraDatacenter{} + err := f.Get(ctx, dcKey, dc) + if err != nil { + t.Log(err) + } + superUserSecret := dc.Spec.SuperuserSecretName + if dc.Spec.SuperuserSecretName == "" { + superUserSecret = cassdcapi.CleanupForKubernetes(dcKey.Name) + "-superuser" + } secret := &corev1.Secret{} - err := f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, "test-superuser"), secret) + err = f.Get(ctx, framework.NewClusterKey(restoreClusterKey.K8sContext, restoreClusterKey.Namespace, superUserSecret), secret) if err != nil { t.Log(err) return false From 0cee3097a8a78d16c6f36a614db6ea0272be2bf6 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Wed, 13 Dec 2023 12:25:34 +1100 Subject: [PATCH 14/26] Changelog. --- CHANGELOG/CHANGELOG-1.10.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG/CHANGELOG-1.10.md b/CHANGELOG/CHANGELOG-1.10.md index 86e49e3e0..566b35ce0 100644 --- a/CHANGELOG/CHANGELOG-1.10.md +++ b/CHANGELOG/CHANGELOG-1.10.md @@ -15,6 +15,8 @@ When cutting a new release, update the `unreleased` heading to the tag being gen ## unreleased +* [FEATURE] [#1080](https://github.com/k8ssandra/k8ssandra-operator/issues/1080) When a restore is completed, an annotation is added to the user secrets that will cause cass-operator to refresh them on the Cassandra side. + ## v1.10.3 - 2023-11-15 * [BUGFIX] [#1110](https://github.com/k8ssandra/k8ssandra-operator/issues/1110) Fix cluster name being set to "Test Cluster" when running Cassandra 4.1+ From 5d63dfbaa19b22e10244bcd3922a78a68450b444 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Wed, 13 Dec 2023 12:41:23 +1100 Subject: [PATCH 15/26] More debugging. --- pkg/medusa/refresh_secrets.go | 2 +- test/e2e/medusa_test.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index 95223e5dc..f9d3629b5 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -17,7 +17,6 @@ import ( func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} - for _, user := range dc.Spec.Users { userSecrets = append(userSecrets, user.SecretName) } @@ -26,6 +25,7 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie } else { userSecrets = append(userSecrets, dc.Spec.SuperuserSecretName) } + logger.Info(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. for _, i := range userSecrets { secret := &corev1.Secret{} diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 105e78ef6..1effe168f 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -218,6 +218,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 err := f.Get(ctx, dcKey, dc) if err != nil { t.Log(err) + return false } superUserSecret := dc.Spec.SuperuserSecretName if dc.Spec.SuperuserSecretName == "" { @@ -230,6 +231,9 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 return false } _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] + if !exists { + t.Logf("%#v", *secret) + } return exists }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "superuser secret wasn't updated with refresh annotation") From e9223273eeb8cc9dc80a8d5aab32f8c1a0a7e0df Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Thu, 14 Dec 2023 13:34:54 +1100 Subject: [PATCH 16/26] More debugging. --- .../kind_multicluster_e2e_tests.yaml | 26 +++++++++---------- .../medusa/medusarestorejob_controller.go | 2 +- pkg/medusa/refresh_secrets.go | 4 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/kind_multicluster_e2e_tests.yaml b/.github/workflows/kind_multicluster_e2e_tests.yaml index 3f0d10741..6f3c5b846 100644 --- a/.github/workflows/kind_multicluster_e2e_tests.yaml +++ b/.github/workflows/kind_multicluster_e2e_tests.yaml @@ -57,22 +57,22 @@ jobs: strategy: matrix: e2e_test: - - CreateMultiDatacenterCluster - - CreateMixedMultiDataCenterCluster - - AddDcToClusterDiffDataplane - - RemoveDcFromCluster - - CheckStargateApisWithMultiDcCluster - - CreateMultiStargateAndDatacenter - - CreateMultiReaper - - ClusterScoped/MultiDcMultiCluster + # - CreateMultiDatacenterCluster + # - CreateMixedMultiDataCenterCluster + # - AddDcToClusterDiffDataplane + # - RemoveDcFromCluster + # - CheckStargateApisWithMultiDcCluster + # - CreateMultiStargateAndDatacenter + # - CreateMultiReaper + # - ClusterScoped/MultiDcMultiCluster - CreateMultiMedusaJob - - MultiDcAuthOnOff + # - MultiDcAuthOnOff # TODO: these e2e tests started breaking after new client certificates were added. Needs fixing. #- MultiDcEncryptionWithStargate - - MultiDcEncryptionWithReaper - - StopAndRestartDc - - CreateMultiDatacenterDseCluster - - PerNodeConfig/InitialTokens + # - MultiDcEncryptionWithReaper + # - StopAndRestartDc + # - CreateMultiDatacenterDseCluster + # - PerNodeConfig/InitialTokens fail-fast: false name: ${{ matrix.e2e_test }} env: diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 2f00fc931..4bac087de 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -176,7 +176,7 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req return ctrl.Result{RequeueAfter: r.DefaultDelay}, err } - request.Log.Info("The restore operation is complete") + request.Log.Info("The restore operation is complete for DC", "CassandraDatacenter", request.Datacenter.Name) err = medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) if err != nil { diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index f9d3629b5..6bd612aa4 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -15,7 +15,7 @@ import ( ) func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { - logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) + println(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} for _, user := range dc.Spec.Users { userSecrets = append(userSecrets, user.SecretName) @@ -25,7 +25,7 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie } else { userSecrets = append(userSecrets, dc.Spec.SuperuserSecretName) } - logger.Info(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) + println(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. for _, i := range userSecrets { secret := &corev1.Secret{} From 9a218fb75ace46ce70d63e9ca9ffbc587e1decd3 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 15 Dec 2023 09:24:08 +1100 Subject: [PATCH 17/26] Error handling, restore old logging style. --- pkg/medusa/refresh_secrets.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index 6bd612aa4..a1b057066 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -2,6 +2,7 @@ package medusa import ( "context" + "errors" "fmt" "time" @@ -15,7 +16,7 @@ import ( ) func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { - println(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) + logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} for _, user := range dc.Spec.Users { userSecrets = append(userSecrets, user.SecretName) @@ -25,7 +26,7 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie } else { userSecrets = append(userSecrets, dc.Spec.SuperuserSecretName) } - println(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) + logger.Info(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. for _, i := range userSecrets { secret := &corev1.Secret{} @@ -38,7 +39,9 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie secret.ObjectMeta.Annotations = make(map[string]string) } secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = time.Now().String() - reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) + if err := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret); err != nil { + return errors.New(err.GetError().Error()) + } } return nil From f7019cb4a18595c21511a4438efc28fda9cd6128 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 15 Dec 2023 14:29:54 +1100 Subject: [PATCH 18/26] Print version and commit information at startup. --- Makefile | 7 ++++++- main.go | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a60056b06..20338dd27 100644 --- a/Makefile +++ b/Makefile @@ -167,8 +167,13 @@ endif ##@ Build +DATE ?= $(shell date) +VERSION ?= $(VERSION) +COMMIT ?= $(shell git rev-parse --short HEAD) + + build: generate fmt vet ## Build manager binary. - go build -o bin/manager main.go + go build --ldflags "-X \"main.version=$(VERSION)\" -X \"main.date=$(DATE)\" -X \"main.commit=$(COMMIT)\"" -o bin/manager main.go run: manifests generate fmt vet ## Run a controller from your host. go run ./main.go diff --git a/main.go b/main.go index a03db0003..0fdd08cec 100644 --- a/main.go +++ b/main.go @@ -67,6 +67,15 @@ import ( // +kubebuilder:scaffold:imports ) +var ( + version = "dev" + commit = "n/a" + date = "n/a" + versionMessage = "#######################" + + fmt.Sprintf("#### version %s commit %s date %s ####", version, commit, date) + + "#######################" +) + var ( scheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") @@ -114,6 +123,8 @@ func main() { setupLog.Info("watch namespace configured", "namespace", watchNamespace) } + setupLog.Info(versionMessage) + options := ctrl.Options{ Scheme: scheme, MetricsBindAddress: metricsAddr, From d4c5188c18ae6f3ae4c1629b1718ded68aaba390 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 15 Dec 2023 18:14:27 +1100 Subject: [PATCH 19/26] Reinstate other tests. --- .../kind_multicluster_e2e_tests.yaml | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/kind_multicluster_e2e_tests.yaml b/.github/workflows/kind_multicluster_e2e_tests.yaml index 6f3c5b846..3f0d10741 100644 --- a/.github/workflows/kind_multicluster_e2e_tests.yaml +++ b/.github/workflows/kind_multicluster_e2e_tests.yaml @@ -57,22 +57,22 @@ jobs: strategy: matrix: e2e_test: - # - CreateMultiDatacenterCluster - # - CreateMixedMultiDataCenterCluster - # - AddDcToClusterDiffDataplane - # - RemoveDcFromCluster - # - CheckStargateApisWithMultiDcCluster - # - CreateMultiStargateAndDatacenter - # - CreateMultiReaper - # - ClusterScoped/MultiDcMultiCluster + - CreateMultiDatacenterCluster + - CreateMixedMultiDataCenterCluster + - AddDcToClusterDiffDataplane + - RemoveDcFromCluster + - CheckStargateApisWithMultiDcCluster + - CreateMultiStargateAndDatacenter + - CreateMultiReaper + - ClusterScoped/MultiDcMultiCluster - CreateMultiMedusaJob - # - MultiDcAuthOnOff + - MultiDcAuthOnOff # TODO: these e2e tests started breaking after new client certificates were added. Needs fixing. #- MultiDcEncryptionWithStargate - # - MultiDcEncryptionWithReaper - # - StopAndRestartDc - # - CreateMultiDatacenterDseCluster - # - PerNodeConfig/InitialTokens + - MultiDcEncryptionWithReaper + - StopAndRestartDc + - CreateMultiDatacenterDseCluster + - PerNodeConfig/InitialTokens fail-fast: false name: ${{ matrix.e2e_test }} env: From d823aa4b8417f491d3f790b96245647243f00d9d Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Thu, 4 Jan 2024 13:33:39 +1100 Subject: [PATCH 20/26] Fix infinite requeue problem. --- .../medusa/medusarestorejob_controller.go | 11 +++--- pkg/medusa/refresh_secrets.go | 34 +++++++++++++++---- pkg/medusa/refresh_secrets_test.go | 7 ++-- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 4bac087de..73a127ef4 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -178,11 +178,12 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req request.Log.Info("The restore operation is complete for DC", "CassandraDatacenter", request.Datacenter.Name) - err = medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) - if err != nil { - request.Log.Error(err, "Failed to refresh Cassandra users in the DB") - // Not going to bother applying updates here since we hit an error. - return ctrl.Result{RequeueAfter: r.DefaultDelay}, err + recRes := medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) + switch { + case recRes.IsError(): + return ctrl.Result{RequeueAfter: r.DefaultDelay}, recRes.GetError() + case recRes.IsRequeue(): + return ctrl.Result{RequeueAfter: r.DefaultDelay}, nil } return ctrl.Result{}, nil diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index a1b057066..3d8ac8550 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -2,7 +2,6 @@ package medusa import ( "context" - "errors" "fmt" "time" @@ -10,12 +9,17 @@ import ( cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" "github.com/k8ssandra/k8ssandra-operator/pkg/reconciliation" + "github.com/k8ssandra/k8ssandra-operator/pkg/result" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) -func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) error { +const ( + maxRetries = 5 +) + +func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) result.ReconcileResult { logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} for _, user := range dc.Spec.Users { @@ -33,16 +37,32 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) if err != nil { logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) - return err + return result.Error(err) } if secret.ObjectMeta.Annotations == nil { secret.ObjectMeta.Annotations = make(map[string]string) } secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = time.Now().String() - if err := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret); err != nil { - return errors.New(err.GetError().Error()) + // We need to do our own retries here instead of delegating it back up to the reconciler, because of + // the nature (time based) of the annotation we're adding. Otherwise we never complete because the + // object on the server never matches the desired object with the new time. + retries := 0 + InnerRetryLoop: + for retries <= maxRetries { + recRes := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) + switch { + case recRes.IsError(): + return recRes + case recRes.IsRequeue(): + retries++ + continue + case recRes.IsDone(): + break InnerRetryLoop + } + } + if retries == maxRetries { + return result.Error(fmt.Errorf("failed to refresh secret %s after %d retries", i, maxRetries)) } } - return nil - + return result.Done() } diff --git a/pkg/medusa/refresh_secrets_test.go b/pkg/medusa/refresh_secrets_test.go index 908535d83..fca842c65 100644 --- a/pkg/medusa/refresh_secrets_test.go +++ b/pkg/medusa/refresh_secrets_test.go @@ -28,7 +28,8 @@ func TestRefreshSecrets_defaultSUSecret(t *testing.T) { for _, i := range secrets { assert.NoError(t, fakeClient.Create(context.Background(), &i)) } - assert.NoError(t, RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0)) + recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0) + assert.True(t, recRes.IsDone()) suSecret := &corev1.Secret{} assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "test-cluster-superuser", Namespace: "test"}, suSecret)) _, exists := suSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] @@ -55,7 +56,9 @@ func TestRefreshSecrets_customSecrets(t *testing.T) { for _, i := range secrets { assert.NoError(t, fakeClient.Create(context.Background(), &i)) } - assert.NoError(t, RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0)) + + recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0) + assert.True(t, recRes.IsDone()) suSecret := &corev1.Secret{} assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "cass-custom-superuser", Namespace: "test"}, suSecret)) _, exists := suSecret.ObjectMeta.Annotations["k8ssandra.io/refresh"] From 824732d82a2b3d0c79f251eb71d5c3ffbf2702c0 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Thu, 4 Jan 2024 14:23:45 +1100 Subject: [PATCH 21/26] Longer timeout for restore e2e test. --- test/e2e/medusa_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 1effe168f..1dfd4e92b 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -211,7 +211,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 } return !restore.Status.FinishTime.IsZero() - }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") + }, polling.medusaRestoreDone.timeout*100, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") require.Eventually(func() bool { dc := &cassdcapi.CassandraDatacenter{} From c98f5f2632032af7b1ec22fadcb3c152b3aa63dd Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Thu, 4 Jan 2024 15:20:02 +1100 Subject: [PATCH 22/26] Ensure we always get most recent version of secret even after failure. --- pkg/medusa/refresh_secrets.go | 19 ++++++++++--------- test/e2e/medusa_test.go | 3 --- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index 3d8ac8550..2399e8143 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -32,23 +32,24 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie } logger.Info(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. + timestamp := time.Now().String() for _, i := range userSecrets { secret := &corev1.Secret{} - err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) - if err != nil { - logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) - return result.Error(err) - } - if secret.ObjectMeta.Annotations == nil { - secret.ObjectMeta.Annotations = make(map[string]string) - } - secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = time.Now().String() // We need to do our own retries here instead of delegating it back up to the reconciler, because of // the nature (time based) of the annotation we're adding. Otherwise we never complete because the // object on the server never matches the desired object with the new time. retries := 0 InnerRetryLoop: for retries <= maxRetries { + err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) + return result.Error(err) + } + if secret.ObjectMeta.Annotations == nil { + secret.ObjectMeta.Annotations = make(map[string]string) + } + secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = timestamp recRes := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) switch { case recRes.IsError(): diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 1dfd4e92b..c9a33b71c 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -231,9 +231,6 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 return false } _, exists := secret.Annotations[k8ssandraapi.RefreshAnnotation] - if !exists { - t.Logf("%#v", *secret) - } return exists }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "superuser secret wasn't updated with refresh annotation") From a9e707fc00d394ded3afcaea96da1316e3574b14 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Thu, 4 Jan 2024 17:52:37 +1100 Subject: [PATCH 23/26] Back out changes to e2e test timeout so we can get logs again. --- test/e2e/medusa_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index c9a33b71c..f40919179 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -211,7 +211,7 @@ func verifyRestoreJobFinished(t *testing.T, ctx context.Context, f *framework.E2 } return !restore.Status.FinishTime.IsZero() - }, polling.medusaRestoreDone.timeout*100, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") + }, polling.medusaRestoreDone.timeout, polling.medusaRestoreDone.interval, "restore didn't finish within timeout") require.Eventually(func() bool { dc := &cassdcapi.CassandraDatacenter{} From b28e8d1731d710e7e4786412469cb14e0c84033a Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 5 Jan 2024 15:07:30 +1100 Subject: [PATCH 24/26] Fix up build variables in makefile. --- Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 20338dd27..e140e0966 100644 --- a/Makefile +++ b/Makefile @@ -168,12 +168,10 @@ endif ##@ Build DATE ?= $(shell date) -VERSION ?= $(VERSION) COMMIT ?= $(shell git rev-parse --short HEAD) - build: generate fmt vet ## Build manager binary. - go build --ldflags "-X \"main.version=$(VERSION)\" -X \"main.date=$(DATE)\" -X \"main.commit=$(COMMIT)\"" -o bin/manager main.go + go build --ldflags "-X \"main.version=${VERSION}\" -X \"main.date=${DATE}\" -X \"main.commit=${COMMIT}\"" -o bin/manager main.go run: manifests generate fmt vet ## Run a controller from your host. go run ./main.go From 2ba393eeccf6a461e000ff83059c77cabccb4652 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 8 Jan 2024 13:57:25 +1100 Subject: [PATCH 25/26] Fix requeue handling and switch to making timestamp based on finishTime of restoreJob. --- .../medusa/medusarestorejob_controller.go | 2 +- pkg/medusa/refresh_secrets.go | 54 +++++++++---------- pkg/medusa/refresh_secrets_test.go | 5 +- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 73a127ef4..2a9dbdf7f 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -178,7 +178,7 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req request.Log.Info("The restore operation is complete for DC", "CassandraDatacenter", request.Datacenter.Name) - recRes := medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay) + recRes := medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay, request.RestoreJob.Status.FinishTime) switch { case recRes.IsError(): return ctrl.Result{RequeueAfter: r.DefaultDelay}, recRes.GetError() diff --git a/pkg/medusa/refresh_secrets.go b/pkg/medusa/refresh_secrets.go index 2399e8143..659b24aee 100644 --- a/pkg/medusa/refresh_secrets.go +++ b/pkg/medusa/refresh_secrets.go @@ -11,15 +11,12 @@ import ( "github.com/k8ssandra/k8ssandra-operator/pkg/reconciliation" "github.com/k8ssandra/k8ssandra-operator/pkg/result" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - maxRetries = 5 -) - -func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration) result.ReconcileResult { +func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, client client.Client, logger logr.Logger, requeueDelay time.Duration, restoreTimestamp metav1.Time) result.ReconcileResult { logger.Info(fmt.Sprintf("Restore complete for DC %#v, Refreshing secrets", dc.ObjectMeta)) userSecrets := []string{} for _, user := range dc.Spec.Users { @@ -32,37 +29,34 @@ func RefreshSecrets(dc *cassdcapi.CassandraDatacenter, ctx context.Context, clie } logger.Info(fmt.Sprintf("refreshing user secrets for %v", userSecrets)) // Both Reaper and medusa secrets go into the userSecrets, so they don't need special handling. - timestamp := time.Now().String() + requeues := 0 for _, i := range userSecrets { secret := &corev1.Secret{} // We need to do our own retries here instead of delegating it back up to the reconciler, because of // the nature (time based) of the annotation we're adding. Otherwise we never complete because the // object on the server never matches the desired object with the new time. - retries := 0 - InnerRetryLoop: - for retries <= maxRetries { - err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) - if err != nil { - logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) - return result.Error(err) - } - if secret.ObjectMeta.Annotations == nil { - secret.ObjectMeta.Annotations = make(map[string]string) - } - secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = timestamp - recRes := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) - switch { - case recRes.IsError(): - return recRes - case recRes.IsRequeue(): - retries++ - continue - case recRes.IsDone(): - break InnerRetryLoop - } + err := client.Get(ctx, types.NamespacedName{Name: i, Namespace: dc.Namespace}, secret) + + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to get secret %s", i)) + return result.Error(err) + } + if secret.ObjectMeta.Annotations == nil { + secret.ObjectMeta.Annotations = make(map[string]string) + } + secret.ObjectMeta.Annotations[k8ssandraapi.RefreshAnnotation] = restoreTimestamp.String() + recRes := reconciliation.ReconcileObject(ctx, client, requeueDelay, *secret) + switch { + case recRes.IsError(): + return recRes + case recRes.IsRequeue(): + requeues++ + continue + case recRes.IsDone(): + continue } - if retries == maxRetries { - return result.Error(fmt.Errorf("failed to refresh secret %s after %d retries", i, maxRetries)) + if requeues > 0 { + return result.RequeueSoon(requeueDelay) } } return result.Done() diff --git a/pkg/medusa/refresh_secrets_test.go b/pkg/medusa/refresh_secrets_test.go index fca842c65..643064cc1 100644 --- a/pkg/medusa/refresh_secrets_test.go +++ b/pkg/medusa/refresh_secrets_test.go @@ -28,7 +28,8 @@ func TestRefreshSecrets_defaultSUSecret(t *testing.T) { for _, i := range secrets { assert.NoError(t, fakeClient.Create(context.Background(), &i)) } - recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0) + + recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0, metav1.Now()) assert.True(t, recRes.IsDone()) suSecret := &corev1.Secret{} assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "test-cluster-superuser", Namespace: "test"}, suSecret)) @@ -57,7 +58,7 @@ func TestRefreshSecrets_customSecrets(t *testing.T) { assert.NoError(t, fakeClient.Create(context.Background(), &i)) } - recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0) + recRes := RefreshSecrets(&cassDC, context.Background(), fakeClient, logr.Logger{}, 0, metav1.Now()) assert.True(t, recRes.IsDone()) suSecret := &corev1.Secret{} assert.NoError(t, fakeClient.Get(context.Background(), types.NamespacedName{Name: "cass-custom-superuser", Namespace: "test"}, suSecret)) From 905a9128c705f57b094a130e44ed366fa0bffef4 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 8 Jan 2024 17:11:07 +1100 Subject: [PATCH 26/26] Maybe let's use StartTime instead of finishTime to ensure that we don't endlessly loop. --- controllers/medusa/medusarestorejob_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 2a9dbdf7f..c5f194ba8 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -178,7 +178,7 @@ func (r *MedusaRestoreJobReconciler) Reconcile(ctx context.Context, req ctrl.Req request.Log.Info("The restore operation is complete for DC", "CassandraDatacenter", request.Datacenter.Name) - recRes := medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay, request.RestoreJob.Status.FinishTime) + recRes := medusa.RefreshSecrets(request.Datacenter, ctx, r.Client, request.Log, r.DefaultDelay, request.RestoreJob.Status.StartTime) switch { case recRes.IsError(): return ctrl.Result{RequeueAfter: r.DefaultDelay}, recRes.GetError()