Skip to content

Commit

Permalink
Disallow relocation execution when a cluster is unreachable
Browse files Browse the repository at this point in the history
Added a check to stop relocation reconciliation if one of the clusters is
unreachable. This prevents potential misclassification after hub recovery,
which could lead to undesired results and inconsistencies.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2304182

Signed-off-by: Benamar Mekhissi <[email protected]>
  • Loading branch information
Benamar Mekhissi authored and raghavendra-talur committed Oct 7, 2024
1 parent b9a39d2 commit 791fef0
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
8 changes: 8 additions & 0 deletions internal/controller/drplacementcontrol.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,14 @@ func (d *DRPCInstance) RunRelocate() (bool, error) {

const done = true

if d.reconciler.numClustersQueriedSuccessfully != len(d.drPolicy.Spec.DRClusters) {
d.log.Info("Can't progress with relocation -- Not all clusters are reachable",
"numClustersQueriedSuccessfully", d.reconciler.numClustersQueriedSuccessfully,
"NumOfClusters", len(d.drPolicy.Spec.DRClusters))

return !done, nil
}

preferredCluster := d.instance.Spec.PreferredCluster
preferredClusterNamespace := d.instance.Spec.PreferredCluster

Expand Down
37 changes: 20 additions & 17 deletions internal/controller/drplacementcontrol_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,16 @@ type ProgressCallback func(string, string)
// DRPlacementControlReconciler reconciles a DRPlacementControl object
type DRPlacementControlReconciler struct {
client.Client
APIReader client.Reader
Log logr.Logger
MCVGetter rmnutil.ManagedClusterViewGetter
Scheme *runtime.Scheme
Callback ProgressCallback
eventRecorder *rmnutil.EventReporter
savedInstanceStatus rmn.DRPlacementControlStatus
ObjStoreGetter ObjectStoreGetter
RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
APIReader client.Reader
Log logr.Logger
MCVGetter rmnutil.ManagedClusterViewGetter
Scheme *runtime.Scheme
Callback ProgressCallback
eventRecorder *rmnutil.EventReporter
savedInstanceStatus rmn.DRPlacementControlStatus
ObjStoreGetter ObjectStoreGetter
RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
numClustersQueriedSuccessfully int
}

// SetupWithManager sets up the controller with the Manager.
Expand Down Expand Up @@ -373,11 +374,13 @@ func (r *DRPlacementControlReconciler) createDRPCInstance(
return nil, err
}

vrgs, _, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
vrgs, cqs, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
if err != nil {
return nil, err
}

r.numClustersQueriedSuccessfully = cqs

d := &DRPCInstance{
reconciler: r,
ctx: ctx,
Expand Down Expand Up @@ -1097,7 +1100,7 @@ func getVRGsFromManagedClusters(
annotations[DRPCNameAnnotation] = drpc.Name
annotations[DRPCNamespaceAnnotation] = drpc.Namespace

var clustersQueriedSuccessfully int
var numClustersQueriedSuccessfully int

var failedCluster string

Expand All @@ -1109,7 +1112,7 @@ func getVRGsFromManagedClusters(
// Only NotFound error is accepted
if errors.IsNotFound(err) {
log.Info(fmt.Sprintf("VRG not found on %q", drCluster.Name))
clustersQueriedSuccessfully++
numClustersQueriedSuccessfully++

continue
}
Expand All @@ -1121,7 +1124,7 @@ func getVRGsFromManagedClusters(
continue
}

clustersQueriedSuccessfully++
numClustersQueriedSuccessfully++

if rmnutil.ResourceIsDeleted(drCluster) {
log.Info("Skipping VRG on deleted drcluster", "drcluster", drCluster.Name, "vrg", vrg.Name)
Expand All @@ -1135,15 +1138,15 @@ func getVRGsFromManagedClusters(
}

// We are done if we successfully queried all drClusters
if clustersQueriedSuccessfully == len(drClusters) {
return vrgs, clustersQueriedSuccessfully, "", nil
if numClustersQueriedSuccessfully == len(drClusters) {
return vrgs, numClustersQueriedSuccessfully, "", nil
}

if clustersQueriedSuccessfully == 0 {
if numClustersQueriedSuccessfully == 0 {
return vrgs, 0, "", fmt.Errorf("failed to retrieve VRGs from clusters")
}

return vrgs, clustersQueriedSuccessfully, failedCluster, nil
return vrgs, numClustersQueriedSuccessfully, failedCluster, nil
}

func (r *DRPlacementControlReconciler) deleteClonedPlacementRule(ctx context.Context,
Expand Down
4 changes: 2 additions & 2 deletions internal/controller/drplacementcontrol_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2479,8 +2479,8 @@ var _ = Describe("DRPlacementControl Reconciler", func() {
clearFakeUserPlacementRuleStatus(UserPlacementRuleName, DefaultDRPCNamespace)
clearDRPCStatus()
expectedAction := rmn.ActionRelocate
expectedPhase := rmn.Relocated
exptectedPorgression := rmn.ProgressionCleaningUp
expectedPhase := rmn.DRState("")
exptectedPorgression := rmn.ProgressionStatus("")
verifyDRPCStateAndProgression(expectedAction, expectedPhase, exptectedPorgression)

// User intervention is required (simulate user intervention)
Expand Down

0 comments on commit 791fef0

Please sign in to comment.