From 54c065173546c14c6760e361880ad239823118dc Mon Sep 17 00:00:00 2001 From: Elena Gershkovich Date: Tue, 5 Mar 2024 13:40:46 +0200 Subject: [PATCH] Add VolumeGroupReplication support Signed-off-by: Elena Gershkovich --- config/dr-cluster/rbac/role.yaml | 20 + config/rbac/role.yaml | 20 + ...hift.io_volumegroupreplicationclasses.yaml | 76 ++++ ...ift.io_volumegroupreplicationcontents.yaml | 176 ++++++++ ....openshift.io_volumegroupreplications.yaml | 257 +++++++++++ .../volumereplicationgroup_controller.go | 54 ++- internal/controller/vrg_volrep.go | 423 +++++++++++++----- internal/controller/vrg_volrep_test.go | 409 ++++++++++++++++- test/addons/rook-pool/storage-class.yaml | 2 + 9 files changed, 1308 insertions(+), 129 deletions(-) create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplications.yaml diff --git a/config/dr-cluster/rbac/role.yaml b/config/dr-cluster/rbac/role.yaml index 47ef42d6ae..c294be8032 100644 --- a/config/dr-cluster/rbac/role.yaml +++ b/config/dr-cluster/rbac/role.yaml @@ -235,6 +235,26 @@ rules: - get - list - watch +- apiGroups: + - replication.storage.openshift.io + resources: + - volumegroupreplicationclasses + verbs: + - get + - list + - watch +- apiGroups: + - replication.storage.openshift.io + resources: + - volumegroupreplications + verbs: + - create + - update + - delete + - get + - list + - watch + - patch - apiGroups: - storage.k8s.io resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 6646480582..eb0de6094f 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -460,6 +460,26 @@ rules: - get - patch - update +- apiGroups: + - replication.storage.openshift.io + resources: + - volumegroupreplicationclasses + verbs: + - get + - list + - watch +- apiGroups: + - replication.storage.openshift.io + resources: + - volumegroupreplications + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - replication.storage.openshift.io resources: diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml new file mode 100644 index 0000000000..b7e6b1bf4e --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml @@ -0,0 +1,76 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplicationclasses.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplicationClass + listKind: VolumeGroupReplicationClassList + plural: volumegroupreplicationclasses + singular: volumegroupreplicationclass + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplicationClass is the Schema for the volumegroupreplicationclasses + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + VolumeGroupReplicationClassSpec specifies parameters that an underlying storage system uses + when creating a volumegroup replica. A specific VolumeGroupReplicationClass is used by specifying + its name in a VolumeGroupReplication object. + properties: + parameters: + additionalProperties: + type: string + description: |- + Parameters is a key-value map with storage provisioner specific configurations for + creating volume group replicas + type: object + x-kubernetes-validations: + - message: parameters are immutable + rule: self == oldSelf + provisioner: + description: Provisioner is the name of storage provisioner + type: string + x-kubernetes-validations: + - message: provisioner is immutable + rule: self == oldSelf + required: + - provisioner + type: object + x-kubernetes-validations: + - message: parameters are immutable + rule: has(self.parameters) == has(oldSelf.parameters) + status: + description: VolumeGroupReplicationClassStatus defines the observed state + of VolumeGroupReplicationClass + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml new file mode 100644 index 0000000000..be67a9e331 --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml @@ -0,0 +1,176 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplicationcontents.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplicationContent + listKind: VolumeGroupReplicationContentList + plural: volumegroupreplicationcontents + singular: volumegroupreplicationcontent + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplicationContent is the Schema for the volumegroupreplicationcontents + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VolumeGroupReplicationContentSpec defines the desired state + of VolumeGroupReplicationContent + properties: + provisioner: + description: |- + provisioner is the name of the CSI driver used to create the physical + volume group on + the underlying storage system. + This MUST be the same as the name returned by the CSI GetPluginName() call for + that driver. + Required. + type: string + source: + description: |- + Source specifies whether the snapshot is (or should be) dynamically provisioned + or already exists, and just requires a Kubernetes object representation. + This field is immutable after creation. + Required. + properties: + volumeHandles: + description: |- + VolumeHandles is a list of volume handles on the backend to be grouped + and replicated. + items: + type: string + type: array + required: + - volumeHandles + type: object + volumeGroupReplicationClassName: + description: |- + VolumeGroupReplicationClassName is the name of the VolumeGroupReplicationClass from + which this group replication was (or will be) created. + type: string + volumeGroupReplicationHandle: + description: |- + VolumeGroupReplicationHandle is a unique id returned by the CSI driver + to identify the VolumeGroupReplication on the storage system. + type: string + volumeGroupReplicationRef: + description: |- + VolumeGroupreplicationRef specifies the VolumeGroupReplication object to which this + VolumeGroupReplicationContent object is bound. + VolumeGroupReplication.Spec.VolumeGroupReplicationContentName field must reference to + this VolumeGroupReplicationContent's name for the bidirectional binding to be valid. + For a pre-existing VolumeGroupReplicationContent object, name and namespace of the + VolumeGroupReplication object MUST be provided for binding to happen. + This field is immutable after creation. + Required. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + TODO: this design is not final and this field is subject to change in the future. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: both volumeGroupReplicationRef.name and volumeGroupReplicationRef.namespace + must be set + rule: has(self.name) && has(self.__namespace__) + - message: volumeGroupReplicationRef is immutable + rule: self == oldSelf + required: + - provisioner + - source + - volumeGroupReplicationHandle + - volumeGroupReplicationRef + type: object + status: + description: VolumeGroupReplicationContentStatus defines the status of + VolumeGroupReplicationContent + properties: + persistentVolumeRefList: + description: |- + PersistentVolumeRefList is the list of of PV for the group replication + The maximum number of allowed PV in the group is 100. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml new file mode 100644 index 0000000000..d8f5757cb2 --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml @@ -0,0 +1,257 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplications.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplication + listKind: VolumeGroupReplicationList + plural: volumegroupreplications + singular: volumegroupreplication + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplication is the Schema for the volumegroupreplications + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VolumeGroupReplicationSpec defines the desired state of VolumeGroupReplication + properties: + autoResync: + default: false + description: |- + AutoResync represents the group to be auto resynced when + ReplicationState is "secondary" + type: boolean + replicationState: + description: |- + ReplicationState represents the replication operation to be performed on the group. + Supported operations are "primary", "secondary" and "resync" + enum: + - primary + - secondary + - resync + type: string + source: + description: |- + Source specifies where a group replications will be created from. + This field is immutable after creation. + Required. + properties: + selector: + description: |- + Selector is a label query over persistent volume claims that are to be + grouped together for replication. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: selector is immutable + rule: self == oldSelf + type: object + x-kubernetes-validations: + - message: source is immutable + rule: self == oldSelf + volumeGroupReplicationClassName: + description: volumeGroupReplicationClassName is the volumeGroupReplicationClass + name for this VolumeGroupReplication resource + type: string + x-kubernetes-validations: + - message: volumeGroupReplicationClassName is immutable + rule: self == oldSelf + volumeGroupReplicationContentName: + description: Name of the VolumeGroupReplicationContent object created + for this volumeGroupReplication + type: string + x-kubernetes-validations: + - message: volumeGroupReplicationContentName is immutable + rule: self == oldSelf + volumeReplicationClassName: + description: volumeReplicationClassName is the volumeReplicationClass + name for VolumeReplication object + type: string + x-kubernetes-validations: + - message: volumReplicationClassName is immutable + rule: self == oldSelf + volumeReplicationName: + description: Name of the VolumeReplication object created for this + volumeGroupReplication + type: string + x-kubernetes-validations: + - message: volumeReplicationName is immutable + rule: self == oldSelf + required: + - autoResync + - replicationState + - source + - volumeGroupReplicationClassName + - volumeReplicationClassName + type: object + status: + description: VolumeGroupReplicationStatus defines the observed state of + VolumeGroupReplication + properties: + conditions: + description: Conditions are the list of conditions and their status. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCompletionTime: + format: date-time + type: string + lastStartTime: + format: date-time + type: string + lastSyncBytes: + format: int64 + type: integer + lastSyncDuration: + type: string + lastSyncTime: + format: date-time + type: string + message: + type: string + observedGeneration: + description: observedGeneration is the last generation change the + operator has dealt with + format: int64 + type: integer + state: + description: State captures the latest state of the replication operation. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/controller/volumereplicationgroup_controller.go b/internal/controller/volumereplicationgroup_controller.go index 73d7726958..cbb870c56d 100644 --- a/internal/controller/volumereplicationgroup_controller.go +++ b/internal/controller/volumereplicationgroup_controller.go @@ -98,7 +98,8 @@ func (r *VolumeReplicationGroupReconciler) SetupWithManager( builder.WithPredicates(rmnutil.CreateOrDeleteOrResourceVersionUpdatePredicate{}), ). Watches(&corev1.ConfigMap{}, handler.EnqueueRequestsFromMapFunc(r.configMapFun)). - Owns(&volrep.VolumeReplication{}) + Owns(&volrep.VolumeReplication{}). + Owns(&volrep.VolumeGroupReplication{}) if !ramenConfig.VolSync.Disabled { r.Log.Info("VolSync enabled; adding owns and watches") @@ -363,6 +364,8 @@ func filterPVC(reader client.Reader, pvc *corev1.PersistentVolumeClaim, log logr // +kubebuilder:rbac:groups=ramendr.openshift.io,resources=volumereplicationgroups/finalizers,verbs=update // +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplications,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplicationclasses,verbs=get;list;watch +// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumegroupreplications,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumegroupreplicationclasses,verbs=get;list;watch // +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups=storage.k8s.io,resources=volumeattachments,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch @@ -404,6 +407,7 @@ func (r *VolumeReplicationGroupReconciler) Reconcile(ctx context.Context, req ct volRepPVCs: []corev1.PersistentVolumeClaim{}, volSyncPVCs: []corev1.PersistentVolumeClaim{}, replClassList: &volrep.VolumeReplicationClassList{}, + grpReplClassList: &volrep.VolumeGroupReplicationClassList{}, namespacedName: req.NamespacedName.String(), objectStorers: make(map[string]cachedObjectStorer), storageClassCache: make(map[string]*storagev1.StorageClass), @@ -474,6 +478,7 @@ type VRGInstance struct { volRepPVCs []corev1.PersistentVolumeClaim volSyncPVCs []corev1.PersistentVolumeClaim replClassList *volrep.VolumeReplicationClassList + grpReplClassList *volrep.VolumeGroupReplicationClassList storageClassCache map[string]*storagev1.StorageClass vrgObjectProtected *metav1.Condition kubeObjectsProtected *metav1.Condition @@ -689,7 +694,7 @@ func (v *VRGInstance) updatePVCList() error { return nil } - if len(v.replClassList.Items) == 0 { + if len(v.replClassList.Items) == 0 && len(v.grpReplClassList.Items) == 0 { v.volSyncPVCs = make([]corev1.PersistentVolumeClaim, len(pvcList.Items)) numCopied := copy(v.volSyncPVCs, pvcList.Items) v.log.Info("No VolumeReplicationClass available. Using all PVCs with VolSync", "pvcCount", numCopied) @@ -770,17 +775,28 @@ func (v *VRGInstance) updateReplicationClassList() error { client.MatchingLabels(labelSelector.MatchLabels), } - if err := v.reconciler.List(v.ctx, v.replClassList, listOptions...); err != nil { - v.log.Error(err, "Failed to list Replication Classes", - "labeled", labels.Set(labelSelector.MatchLabels)) + if !rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + if err := v.reconciler.List(v.ctx, v.replClassList, listOptions...); err != nil { + v.log.Error(err, "Failed to list Replication Classes", + "labeled", labels.Set(labelSelector.MatchLabels)) + + return fmt.Errorf("failed to list Replication Classes, %w", err) + } + + v.log.Info("Number of Replication Classes", "count", len(v.replClassList.Items)) + } else { + if err := v.reconciler.List(v.ctx, v.grpReplClassList, listOptions...); err != nil { + v.log.Error(err, "Failed to list Group Replication Classes", + "labeled", labels.Set(labelSelector.MatchLabels)) + + return fmt.Errorf("failed to list Group Replication Classes, %w", err) + } - return fmt.Errorf("failed to list Replication Classes, %w", err) + v.log.Info("Number of Group Replication Classes", "count", len(v.grpReplClassList.Items)) } v.vrcUpdated = true - v.log.Info("Number of Replication Classes", "count", len(v.replClassList.Items)) - return nil } @@ -800,6 +816,7 @@ func (v *VRGInstance) separatePVCsUsingVRGStatus(pvcList *corev1.PersistentVolum } } +//nolint:gocognit,cyclop func (v *VRGInstance) separatePVCsUsingStorageClassProvisioner(pvcList *corev1.PersistentVolumeClaimList) error { for idx := range pvcList.Items { pvc := &pvcList.Items[idx] @@ -818,12 +835,23 @@ func (v *VRGInstance) separatePVCsUsingStorageClassProvisioner(pvcList *corev1.P replicationClassMatchFound := false - for _, replicationClass := range v.replClassList.Items { - if storageClass.Provisioner == replicationClass.Spec.Provisioner { - v.volRepPVCs = append(v.volRepPVCs, *pvc) - replicationClassMatchFound = true + if rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + for _, replicationClass := range v.grpReplClassList.Items { + if storageClass.Provisioner == replicationClass.Spec.Provisioner { + v.volRepPVCs = append(v.volRepPVCs, *pvc) + replicationClassMatchFound = true - break + break + } + } + } else { + for _, replicationClass := range v.replClassList.Items { + if storageClass.Provisioner == replicationClass.Spec.Provisioner { + v.volRepPVCs = append(v.volRepPVCs, *pvc) + replicationClassMatchFound = true + + break + } } } diff --git a/internal/controller/vrg_volrep.go b/internal/controller/vrg_volrep.go index 0220561623..5d5c85639d 100644 --- a/internal/controller/vrg_volrep.go +++ b/internal/controller/vrg_volrep.go @@ -18,6 +18,7 @@ import ( storagev1 "k8s.io/api/storage/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -66,7 +67,7 @@ func (v *VRGInstance) reconcileVolRepsAsPrimary() { } // If VR did not reach primary state, it is fine to still upload the PV and continue processing - requeueResult, _, err := v.processVRAsPrimary(pvcNamespacedName, log) + requeueResult, _, err := v.processVRAsPrimary(pvcNamespacedName, pvc, log) if requeueResult { v.requeue() } @@ -158,7 +159,7 @@ func (v *VRGInstance) reconcileVRAsSecondary(pvc *corev1.PersistentVolumeClaim, pvcNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} - requeueResult, ready, err := v.processVRAsSecondary(pvcNamespacedName, log) + requeueResult, ready, err := v.processVRAsSecondary(pvcNamespacedName, pvc, log) if err != nil { log.Info("Failure in getting or creating VolumeReplication resource for PersistentVolumeClaim", "errorValue", err) @@ -257,7 +258,7 @@ func (v *VRGInstance) updateProtectedPVCs(pvc *corev1.PersistentVolumeClaim) err func setPVCStorageIdentifiers( protectedPVC *ramendrv1alpha1.ProtectedPVC, storageClass *storagev1.StorageClass, - volumeReplicationClass *volrep.VolumeReplicationClass, + volumeReplicationClass client.Object, ) { protectedPVC.StorageIdentifiers.StorageProvisioner = storageClass.Provisioner @@ -325,6 +326,7 @@ func (v *VRGInstance) preparePVCForVRProtection(pvc *corev1.PersistentVolumeClai return v.protectPVC(pvc, log), !skip } +//nolint:funlen,cyclop func (v *VRGInstance) protectPVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) bool { const requeue = true @@ -377,6 +379,17 @@ func (v *VRGInstance) protectPVC(pvc *corev1.PersistentVolumeClaim, log logr.Log return requeue } + + if rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + if err := v.addConsistencyGroupLabel(pvc); err != nil { + log.Info("Requeuing, as adding label for consistency group failed", "errorValue", err) + + msg := "Failed to add label for consistency group to PVC" + v.updatePVCDataReadyCondition(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg) + + return requeue + } + } } return !requeue @@ -460,6 +473,8 @@ func (v *VRGInstance) preparePVCForVRDeletion(pvc *corev1.PersistentVolumeClaim, delete(pvc.Annotations, pvcVRAnnotationProtectedKey) delete(pvc.Annotations, pvcVRAnnotationArchivedKey) + delete(pvc.Labels, ConsistencyGroupLabel) + log1 := log.WithValues("owner removed", ownerRemoved, "finalizer removed", finalizerRemoved) if err := v.reconciler.Update(v.ctx, pvc); err != nil { @@ -845,7 +860,7 @@ func (v *VRGInstance) reconcileVRForDeletion(pvc *corev1.PersistentVolumeClaim, return !requeue } } else { - requeueResult, ready, err := v.processVRAsPrimary(pvcNamespacedName, log) + requeueResult, ready, err := v.processVRAsPrimary(pvcNamespacedName, pvc, log) switch { case err != nil: log.Info("Requeuing due to failure in getting or creating VolumeReplication resource for PersistentVolumeClaim", @@ -867,7 +882,7 @@ func (v *VRGInstance) undoPVCFinalizersAndPVRetention(pvc *corev1.PersistentVolu pvcNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} - if err := v.deleteVR(pvcNamespacedName, log); err != nil { + if err := v.deleteVR(pvcNamespacedName, pvc, log); err != nil { log.Info("Requeuing due to failure in finalizing VolumeReplication resource for PersistentVolumeClaim", "errorValue", err) @@ -904,10 +919,11 @@ func (v *VRGInstance) reconcileMissingVR(pvc *corev1.PersistentVolumeClaim, log return !vrMissing, !requeue } - volRep := &volrep.VolumeReplication{} + var volRep client.Object + vrNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} - err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + err := v.getVolumeReplication(pvc, vrNamespacedName, &volRep) if err == nil { if rmnutil.ResourceIsDeleted(volRep) { log.Info("Requeuing due to processing a VR under deletion") @@ -936,6 +952,69 @@ func (v *VRGInstance) reconcileMissingVR(pvc *corev1.PersistentVolumeClaim, log return vrMissing, !requeue } +func (v *VRGInstance) getVolumeReplication(pvc *corev1.PersistentVolumeClaim, + vrNamespacedName types.NamespacedName, volRep *client.Object, +) error { + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + vrNamespacedName.Name = cg + v.instance.Name + + *volRep = &volrep.VolumeGroupReplication{} + } else { + *volRep = &volrep.VolumeReplication{} + } + + return v.reconciler.Get(v.ctx, vrNamespacedName, *volRep) +} + +func (v *VRGInstance) createVolumeReplication(vrNamespacedName types.NamespacedName, + volumeReplicationClass client.Object, state volrep.ReplicationState, +) client.Object { + volRep := &volrep.VolumeReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + Labels: rmnutil.OwnerLabels(v.instance), + }, + Spec: volrep.VolumeReplicationSpec{ + DataSource: corev1.TypedLocalObjectReference{ + Kind: "PersistentVolumeClaim", + Name: vrNamespacedName.Name, + APIGroup: new(string), + }, + ReplicationState: state, + VolumeReplicationClass: volumeReplicationClass.GetName(), + AutoResync: v.autoResync(state), + }, + } + + return volRep +} + +func (v *VRGInstance) createVolumeGroupReplication(storageID string, vrNamespacedName types.NamespacedName, + volumeReplicationClass client.Object, state volrep.ReplicationState, +) client.Object { + selector := metav1.AddLabelToSelector(&v.recipeElements.PvcSelector.LabelSelector, + ConsistencyGroupLabel, storageID) + + volRep := &volrep.VolumeGroupReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + Labels: rmnutil.OwnerLabels(v.instance), + }, + Spec: volrep.VolumeGroupReplicationSpec{ + ReplicationState: state, + VolumeGroupReplicationClassName: volumeReplicationClass.GetName(), + Source: volrep.VolumeGroupReplicationSource{ + Selector: selector, + }, + }, + } + + return volRep +} + func (v *VRGInstance) deleteClusterDataInS3Stores(log logr.Logger) error { log.Info("Delete cluster data in", "s3Profiles", v.instance.Spec.S3Profiles) @@ -1009,9 +1088,11 @@ func (v *VRGInstance) s3StoreDo(do func(ObjectStorer) error, msg, s3ProfileName // - a boolean indicating if a reconcile requeue is required // - a boolean indicating if VR is already at the desired state // - any errors during processing -func (v *VRGInstance) processVRAsPrimary(vrNamespacedName types.NamespacedName, log logr.Logger) (bool, bool, error) { +func (v *VRGInstance) processVRAsPrimary(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) (bool, bool, error) { if v.instance.Spec.Async != nil { - return v.createOrUpdateVR(vrNamespacedName, volrep.Primary, log) + return v.createOrUpdateVR(vrNamespacedName, pvc, volrep.Primary, log) } // TODO: createOrUpdateVR does two things. It modifies the VR and also @@ -1041,9 +1122,11 @@ func (v *VRGInstance) processVRAsPrimary(vrNamespacedName types.NamespacedName, // - a boolean indicating if a reconcile requeue is required // - a boolean indicating if VR is already at the desired state // - any errors during processing -func (v *VRGInstance) processVRAsSecondary(vrNamespacedName types.NamespacedName, log logr.Logger) (bool, bool, error) { +func (v *VRGInstance) processVRAsSecondary(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) (bool, bool, error) { if v.instance.Spec.Async != nil { - return v.createOrUpdateVR(vrNamespacedName, volrep.Secondary, log) + return v.createOrUpdateVR(vrNamespacedName, pvc, volrep.Secondary, log) } // TODO: createOrUpdateVR does two things. It modifies the VR and also @@ -1080,13 +1163,13 @@ func (v *VRGInstance) processVRAsSecondary(vrNamespacedName types.NamespacedName // - a boolean indicating if VR is already at the desired state // - any errors during processing func (v *VRGInstance) createOrUpdateVR(vrNamespacedName types.NamespacedName, - state volrep.ReplicationState, log logr.Logger, + pvc *corev1.PersistentVolumeClaim, state volrep.ReplicationState, log logr.Logger, ) (bool, bool, error) { const requeue = true - volRep := &volrep.VolumeReplication{} + var volRep client.Object - err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + err := v.getVolumeReplication(pvc, vrNamespacedName, &volRep) if err != nil { if !k8serrors.IsNotFound(err) { log.Error(err, "Failed to get VolumeReplication resource", "resource", vrNamespacedName) @@ -1104,7 +1187,7 @@ func (v *VRGInstance) createOrUpdateVR(vrNamespacedName types.NamespacedName, } // Create VR for PVC - if err = v.createVR(vrNamespacedName, state); err != nil { + if err = v.createVR(vrNamespacedName, pvc, state); err != nil { log.Error(err, "Failed to create VolumeReplication resource", "resource", vrNamespacedName) rmnutil.ReportIfNotPresent(v.reconciler.eventRecorder, v.instance, corev1.EventTypeWarning, rmnutil.EventReasonVRCreateFailed, err.Error()) @@ -1124,7 +1207,7 @@ func (v *VRGInstance) createOrUpdateVR(vrNamespacedName types.NamespacedName, return !requeue, false, nil } - return v.updateVR(volRep, state, log) + return v.updateVR(pvc, volRep, state, log) } func (v *VRGInstance) autoResync(state volrep.ReplicationState) bool { @@ -1143,20 +1226,40 @@ func (v *VRGInstance) autoResync(state volrep.ReplicationState) bool { // - a boolean indicating if a reconcile requeue is required // - a boolean indicating if VR is already at the desired state // - any errors during the process of updating the resource -func (v *VRGInstance) updateVR(volRep *volrep.VolumeReplication, +func (v *VRGInstance) updateVR(pvc *corev1.PersistentVolumeClaim, volRep client.Object, state volrep.ReplicationState, log logr.Logger, ) (bool, bool, error) { const requeue = true - // If state is already as desired, check the status - if volRep.Spec.ReplicationState == state && volRep.Spec.AutoResync == v.autoResync(state) { - log.Info("VolumeReplication and VolumeReplicationGroup state and autoresync match. Proceeding to status check") + _, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + log.Info("Update VolumeGroupReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) - return !requeue, v.checkVRStatus(volRep), nil - } + if volRep.(*volrep.VolumeGroupReplication).Spec.ReplicationState == state && + volRep.(*volrep.VolumeGroupReplication).Spec.AutoResync == v.autoResync(state) { + log.Info("VolumeGroupReplication and VolumeReplicationGroup state match. Proceeding to status check") - volRep.Spec.ReplicationState = state - volRep.Spec.AutoResync = v.autoResync(state) + return !requeue, + v.checkVRStatus(pvc, volRep, &volRep.(*volrep.VolumeGroupReplication).Status.VolumeReplicationStatus), + nil + } + + volRep.(*volrep.VolumeGroupReplication).Spec.ReplicationState = state + volRep.(*volrep.VolumeGroupReplication).Spec.AutoResync = v.autoResync(state) + } else { + log.Info("Update VolumeReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) + + // If state is already as desired, check the status + if volRep.(*volrep.VolumeReplication).Spec.ReplicationState == state && + volRep.(*volrep.VolumeReplication).Spec.AutoResync == v.autoResync(state) { + log.Info("VolumeReplication and VolumeReplicationGroup state and autoresync match. Proceeding to status check") + + return !requeue, v.checkVRStatus(pvc, volRep, &volRep.(*volrep.VolumeReplication).Status), nil + } + + volRep.(*volrep.VolumeReplication).Spec.ReplicationState = state + volRep.(*volrep.VolumeReplication).Spec.AutoResync = v.autoResync(state) + } if err := v.reconciler.Update(v.ctx, volRep); err != nil { log.Error(err, "Failed to update VolumeReplication resource", @@ -1166,7 +1269,7 @@ func (v *VRGInstance) updateVR(volRep *volrep.VolumeReplication, rmnutil.EventReasonVRUpdateFailed, err.Error()) msg := "Failed to update VolumeReplication resource" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg) return requeue, false, fmt.Errorf("failed to update VolumeReplication resource"+ " (%s/%s) as %s, belonging to VolumeReplicationGroup (%s/%s), %w", @@ -1178,35 +1281,34 @@ func (v *VRGInstance) updateVR(volRep *volrep.VolumeReplication, volRep.GetName(), volRep.GetNamespace(), state)) // Just updated the state of the VolRep. Mark it as progressing. msg := "Updated VolumeReplication resource for PVC" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonProgressing, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonProgressing, msg) return !requeue, false, nil } // createVR creates a VolumeReplication CR with a PVC as its data source. -func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, state volrep.ReplicationState) error { +func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, state volrep.ReplicationState, +) error { volumeReplicationClass, err := v.selectVolumeReplicationClass(vrNamespacedName) if err != nil { return fmt.Errorf("failed to find the appropriate VolumeReplicationClass (%s) %w", v.instance.Name, err) } - volRep := &volrep.VolumeReplication{ - ObjectMeta: metav1.ObjectMeta{ - Name: vrNamespacedName.Name, - Namespace: vrNamespacedName.Namespace, - Labels: rmnutil.OwnerLabels(v.instance), - }, - Spec: volrep.VolumeReplicationSpec{ - DataSource: corev1.TypedLocalObjectReference{ - Kind: "PersistentVolumeClaim", - Name: vrNamespacedName.Name, - APIGroup: new(string), - }, - ReplicationState: state, - VolumeReplicationClass: volumeReplicationClass.GetName(), - AutoResync: v.autoResync(state), - }, + var volRep client.Object + + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + v.log.Info("Create VolumeGroupReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) + + vrNamespacedName.Name = cg + v.instance.Name + + volRep = v.createVolumeGroupReplication(cg, vrNamespacedName, volumeReplicationClass, state) + } else { + v.log.Info("Create VolumeReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) + + volRep = v.createVolumeReplication(vrNamespacedName, volumeReplicationClass, state) } if !vrgInAdminNamespace(v.instance, v.ramenConfig) { @@ -1234,16 +1336,18 @@ func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, state volr // VolumeReplicationGroup has the same name as pvc. But in future if it changes // functions to be changed would be processVRAsPrimary(), processVRAsSecondary() // to either receive pvc NamespacedName or pvc itself as an additional argument. +// +//nolint:funlen,cyclop,gocognit,nestif,gocyclo func (v *VRGInstance) selectVolumeReplicationClass( namespacedName types.NamespacedName, -) (*volrep.VolumeReplicationClass, error) { +) (client.Object, error) { if err := v.updateReplicationClassList(); err != nil { v.log.Error(err, "Failed to get VolumeReplicationClass list") return nil, fmt.Errorf("failed to get VolumeReplicationClass list") } - if len(v.replClassList.Items) == 0 { + if len(v.replClassList.Items) == 0 && len(v.grpReplClassList.Items) == 0 { v.log.Info("No VolumeReplicationClass available") return nil, fmt.Errorf("no VolumeReplicationClass available") @@ -1258,20 +1362,37 @@ func (v *VRGInstance) selectVolumeReplicationClass( namespacedName, err) } - matchingReplicationClassList := []*volrep.VolumeReplicationClass{} + matchingReplicationClassList := []client.Object{} - for index := range v.replClassList.Items { - replicationClass := &v.replClassList.Items[index] - schedulingInterval, found := replicationClass.Spec.Parameters["schedulingInterval"] + if !rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + for index := range v.replClassList.Items { + replicationClass := &v.replClassList.Items[index] + schedulingInterval, found := replicationClass.Spec.Parameters["schedulingInterval"] - if storageClass.Provisioner != replicationClass.Spec.Provisioner || !found { - // skip this replication class if provisioner does not match or if schedule not found - continue + if storageClass.Provisioner != replicationClass.Spec.Provisioner || !found { + // skip this replication class if provisioner does not match or if schedule not found + continue + } + + // ReplicationClass that matches both VRG schedule and pvc provisioner + if schedulingInterval == v.instance.Spec.Async.SchedulingInterval { + matchingReplicationClassList = append(matchingReplicationClassList, replicationClass) + } } + } else { + for index := range v.grpReplClassList.Items { + replicationClass := &v.grpReplClassList.Items[index] + schedulingInterval, found := replicationClass.Spec.Parameters["schedulingInterval"] - // ReplicationClass that matches both VRG schedule and pvc provisioner - if schedulingInterval == v.instance.Spec.Async.SchedulingInterval { - matchingReplicationClassList = append(matchingReplicationClassList, replicationClass) + if storageClass.Provisioner != replicationClass.Spec.Provisioner || !found { + // skip this replication class if provisioner does not match or if schedule not found + continue + } + + // ReplicationClass that matches both VRG schedule and pvc provisioner + if schedulingInterval == v.instance.Spec.Async.SchedulingInterval { + matchingReplicationClassList = append(matchingReplicationClassList, replicationClass) + } } } @@ -1294,11 +1415,11 @@ func (v *VRGInstance) selectVolumeReplicationClass( // filterDefaultVRC filters the VRC list to return VRCs with default annotation // if the list contains more than one VRC. func (v *VRGInstance) filterDefaultVRC( - replicationClassList []*volrep.VolumeReplicationClass, -) (*volrep.VolumeReplicationClass, error) { + replicationClassList []client.Object, +) (client.Object, error) { v.log.Info("Found multiple matching VolumeReplicationClasses, filtering with default annotation") - filteredVRCs := []*volrep.VolumeReplicationClass{} + filteredVRCs := []client.Object{} for index := range replicationClassList { if replicationClassList[index].GetAnnotations()[defaultVRCAnnotationKey] == "true" { @@ -1310,8 +1431,8 @@ func (v *VRGInstance) filterDefaultVRC( switch len(filteredVRCs) { case 0: - v.log.Info(fmt.Sprintf("Multiple VolumeReplicationClass found, with no default annotation (%s/%s)", - replicationClassList[0].Spec.Provisioner, v.instance.Spec.Async.SchedulingInterval)) + v.log.Info(fmt.Sprintf("Multiple VolumeReplicationClass found, with no default annotation (%s)", + defaultVRCAnnotationKey)) return nil, fmt.Errorf("multiple VolumeReplicationClass found, with no default annotation, %s", defaultVRCAnnotationKey) @@ -1373,30 +1494,32 @@ func (v *VRGInstance) getStorageClass(namespacedName types.NamespacedName) (*sto // checkVRStatus checks if the VolumeReplication resource has the desired status for the // current generation and returns true if so, false otherwise -func (v *VRGInstance) checkVRStatus(volRep *volrep.VolumeReplication) bool { +func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, +) bool { // When the generation in the status is updated, VRG would get a reconcile // as it owns VolumeReplication resource. - if volRep.GetGeneration() != volRep.Status.ObservedGeneration { + if volRep.GetGeneration() != status.ObservedGeneration { v.log.Info(fmt.Sprintf("Generation mismatch in status for VolumeReplication resource (%s/%s)", volRep.GetName(), volRep.GetNamespace())) msg := "VolumeReplication generation not updated in status" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonProgressing, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonProgressing, msg) return false } switch { case v.instance.Spec.ReplicationState == ramendrv1alpha1.Primary: - return v.validateVRStatus(volRep, ramendrv1alpha1.Primary) + return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Primary, status) case v.instance.Spec.ReplicationState == ramendrv1alpha1.Secondary: - return v.validateVRStatus(volRep, ramendrv1alpha1.Secondary) + return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Secondary, status) default: v.log.Info(fmt.Sprintf("invalid Replication State %s for VolumeReplicationGroup (%s:%s)", string(v.instance.Spec.ReplicationState), v.instance.Name, v.instance.Namespace)) msg := "VolumeReplicationGroup state invalid" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg) return false } @@ -1407,7 +1530,9 @@ func (v *VRGInstance) checkVRStatus(volRep *volrep.VolumeReplication) bool { // - When replication state is Primary, only Completed condition is checked. // - When replication state is Secondary, all 3 conditions for Completed/Degraded/Resyncing is // checked and ensured healthy. -func (v *VRGInstance) validateVRStatus(volRep *volrep.VolumeReplication, state ramendrv1alpha1.ReplicationState) bool { +func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + state ramendrv1alpha1.ReplicationState, status *volrep.VolumeReplicationStatus, +) bool { var ( stateString string action string @@ -1423,13 +1548,13 @@ func (v *VRGInstance) validateVRStatus(volRep *volrep.VolumeReplication, state r } // it should be completed - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue) + conditionMet, msg := isVRConditionMet(volRep, status, volrep.ConditionCompleted, metav1.ConditionTrue) if !conditionMet { defaultMsg := fmt.Sprintf("VolumeReplication resource for pvc not %s to %s", action, stateString) - v.updatePVCDataReadyConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataReadyConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) - v.updatePVCDataProtectedConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataProtectedConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace())) @@ -1439,15 +1564,15 @@ func (v *VRGInstance) validateVRStatus(volRep *volrep.VolumeReplication, state r // if primary, all checks are completed if state == ramendrv1alpha1.Secondary { - return v.validateAdditionalVRStatusForSecondary(volRep) + return v.validateAdditionalVRStatusForSecondary(pvc, volRep, status) } msg = "PVC in the VolumeReplicationGroup is ready for use" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonReady, msg) - v.updatePVCDataProtectedCondition(volRep.Namespace, volRep.Name, VRGConditionReasonReady, msg) - v.updatePVCLastSyncTime(volRep.Namespace, volRep.Name, volRep.Status.LastSyncTime) - v.updatePVCLastSyncDuration(volRep.Namespace, volRep.Name, volRep.Status.LastSyncDuration) - v.updatePVCLastSyncBytes(volRep.Namespace, volRep.Name, volRep.Status.LastSyncBytes) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonReady, msg) + v.updatePVCDataProtectedCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonReady, msg) + v.updatePVCLastSyncTime(pvc.GetNamespace(), pvc.GetName(), status.LastSyncTime) + v.updatePVCLastSyncDuration(pvc.GetNamespace(), pvc.GetName(), status.LastSyncDuration) + v.updatePVCLastSyncBytes(pvc.GetNamespace(), pvc.GetName(), status.LastSyncBytes) v.log.Info(fmt.Sprintf("VolumeReplication resource %s/%s is ready for use", volRep.GetName(), volRep.GetNamespace())) @@ -1471,22 +1596,24 @@ func (v *VRGInstance) validateVRStatus(volRep *volrep.VolumeReplication, state r // With 2nd condition being met, // ProtectedPVC.Conditions[DataReady] = True // ProtectedPVC.Conditions[DataProtected] = True -func (v *VRGInstance) validateAdditionalVRStatusForSecondary(volRep *volrep.VolumeReplication) bool { - v.updatePVCLastSyncTime(volRep.Namespace, volRep.Name, nil) - v.updatePVCLastSyncDuration(volRep.Namespace, volRep.Name, nil) - v.updatePVCLastSyncBytes(volRep.Namespace, volRep.Name, nil) - - conditionMet, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue) +func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, +) bool { + v.updatePVCLastSyncTime(pvc.GetNamespace(), pvc.GetName(), nil) + v.updatePVCLastSyncDuration(pvc.GetNamespace(), pvc.GetName(), nil) + v.updatePVCLastSyncBytes(pvc.GetNamespace(), pvc.GetName(), nil) + + conditionMet, _ := isVRConditionMet(volRep, status, volrep.ConditionResyncing, metav1.ConditionTrue) if !conditionMet { - return v.checkResyncCompletionAsSecondary(volRep) + return v.checkResyncCompletionAsSecondary(pvc, volRep, status) } - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue) + conditionMet, msg := isVRConditionMet(volRep, status, volrep.ConditionDegraded, metav1.ConditionTrue) if !conditionMet { - v.updatePVCDataProtectedConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataProtectedConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, "VolumeReplication resource for pvc is not in Degraded condition while resyncing") - v.updatePVCDataReadyConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataReadyConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, "VolumeReplication resource for pvc is not in Degraded condition while resyncing") v.log.Info(fmt.Sprintf("VolumeReplication resource is not in degraded condition while"+ @@ -1496,8 +1623,8 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(volRep *volrep.Volu } msg = "VolumeReplication resource for the pvc is syncing as Secondary" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonReplicating, msg) - v.updatePVCDataProtectedCondition(volRep.Namespace, volRep.Name, VRGConditionReasonReplicating, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonReplicating, msg) + v.updatePVCDataProtectedCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonReplicating, msg) v.log.Info(fmt.Sprintf("VolumeReplication resource for the pvc is syncing as Secondary (%s/%s)", volRep.GetName(), volRep.GetNamespace())) @@ -1506,14 +1633,16 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(volRep *volrep.Volu } // checkResyncCompletionAsSecondary returns true if resync status is complete as secondary, false otherwise -func (v *VRGInstance) checkResyncCompletionAsSecondary(volRep *volrep.VolumeReplication) bool { - conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse) +func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, +) bool { + conditionMet, msg := isVRConditionMet(volRep, status, volrep.ConditionResyncing, metav1.ConditionFalse) if !conditionMet { defaultMsg := "VolumeReplication resource for pvc not syncing as Secondary" - v.updatePVCDataReadyConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataReadyConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) - v.updatePVCDataProtectedConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataProtectedConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace())) @@ -1521,13 +1650,13 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(volRep *volrep.VolumeRepl return false } - conditionMet, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse) + conditionMet, msg = isVRConditionMet(volRep, status, volrep.ConditionDegraded, metav1.ConditionFalse) if !conditionMet { defaultMsg := "VolumeReplication resource for pvc is not syncing and is degraded as Secondary" - v.updatePVCDataReadyConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataReadyConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) - v.updatePVCDataProtectedConditionHelper(volRep.Namespace, volRep.Name, VRGConditionReasonError, msg, + v.updatePVCDataProtectedConditionHelper(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonError, msg, defaultMsg) v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace())) @@ -1536,8 +1665,8 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(volRep *volrep.VolumeRepl } msg = "VolumeReplication resource for the pvc as Secondary is in sync with Primary" - v.updatePVCDataReadyCondition(volRep.Namespace, volRep.Name, VRGConditionReasonReplicated, msg) - v.updatePVCDataProtectedCondition(volRep.Namespace, volRep.Name, VRGConditionReasonDataProtected, msg) + v.updatePVCDataReadyCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonReplicated, msg) + v.updatePVCDataProtectedCondition(pvc.GetNamespace(), pvc.GetName(), VRGConditionReasonDataProtected, msg) v.log.Info(fmt.Sprintf("data sync completed as both degraded and resyncing are false for"+ " secondary VolRep (%s/%s)", volRep.GetName(), volRep.GetNamespace())) @@ -1545,11 +1674,11 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(volRep *volrep.VolumeRepl return true } -func isVRConditionMet(volRep *volrep.VolumeReplication, +func isVRConditionMet(volRep client.Object, status *volrep.VolumeReplicationStatus, conditionType string, desiredStatus metav1.ConditionStatus, ) (bool, string) { - volRepCondition := findCondition(volRep.Status.Conditions, conditionType) + volRepCondition := findCondition(status.Conditions, conditionType) if volRepCondition == nil { msg := fmt.Sprintf("Failed to get the %s condition from status of VolumeReplication resource.", conditionType) @@ -1751,9 +1880,9 @@ func setPVCClusterDataProtectedCondition(protectedPVC *ramendrv1alpha1.Protected // ensureVRDeletedFromAPIServer adds an additional step to ensure that we wait for volumereplication deletion // from API server before moving ahead with vrg finalizer removal. -func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.NamespacedName, log logr.Logger) error { - volRep := &volrep.VolumeReplication{} - +func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.NamespacedName, + volRep client.Object, log logr.Logger, +) error { err := v.reconciler.APIReader.Get(v.ctx, vrNamespacedName, volRep) if err == nil { log.Info("Found VolumeReplication resource pending delete", "vr", volRep) @@ -1773,15 +1902,39 @@ func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.Namesp } // deleteVR deletes a VolumeReplication instance if found -func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, log logr.Logger) error { - cr := &volrep.VolumeReplication{ - ObjectMeta: metav1.ObjectMeta{ - Name: vrNamespacedName.Name, - Namespace: vrNamespacedName.Namespace, - }, +func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) error { + var cr client.Object + + var err error + + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + log.Info("Delete VolumeGroupReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) + + vrNamespacedName.Name = cg + v.instance.Name + + cr, err = v.reconcileVolumeGroupReplicationForDeletion(vrNamespacedName, pvc, log) + if err != nil { + return err + } + + if cr == nil { + return nil + } + } else { + log.Info("Delete VolumeReplication for PVC %s/%s", pvc.GetNamespace(), pvc.GetName()) + + cr = &volrep.VolumeReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + }, + } } - err := v.reconciler.Delete(v.ctx, cr) + err = v.reconciler.Delete(v.ctx, cr) if err != nil { if !k8serrors.IsNotFound(err) { log.Error(err, "Failed to delete VolumeReplication resource") @@ -1793,7 +1946,57 @@ func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, log logr.L return nil } - return v.ensureVRDeletedFromAPIServer(vrNamespacedName, log) + return v.ensureVRDeletedFromAPIServer(vrNamespacedName, cr, log) +} + +func (v *VRGInstance) reconcileVolumeGroupReplicationForDeletion(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) (client.Object, error) { + volRep := &volrep.VolumeGroupReplication{} + + err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + if err != nil { + if !k8serrors.IsNotFound(err) { + log.Error(err, "Failed to get VolumeGroupReplication resource") + + return nil, fmt.Errorf("failed to get VolumeGroupReplication resource (%s/%s), %w", + vrNamespacedName.Namespace, vrNamespacedName.Name, err) + } + + return nil, nil + } + + pvcLabelSelector := volRep.Spec.Source.Selector + + // Found VGR, if there is only 1 PVC protected by it, we can delete + pvcList, err := rmnutil.ListPVCsByPVCSelector(v.ctx, v.reconciler.Client, v.log, + *pvcLabelSelector, + []string{vrNamespacedName.Namespace}, + v.instance.Spec.VolSync.Disabled, + ) + if err != nil { + return nil, err + } + + if len(pvcList.Items) > 1 { + log.Error(err, "VolumeGroupReplication resource is in use and cannot be deleted yet") + + return nil, nil + } + + selector, err := metav1.LabelSelectorAsSelector(pvcLabelSelector) + if err != nil { + return nil, err + } + + labelMatch := selector.Matches(labels.Set(pvc.GetLabels())) + if !labelMatch { + log.Info(fmt.Sprintf("PVC %s does not match VolumeGroupReplication label selector %v", pvc.Name, selector)) + + return nil, fmt.Errorf("PVC %s does not match VolumeGroupReplication label selector %v", pvc.Name, selector) + } + + return volRep, nil } func (v *VRGInstance) addProtectedAnnotationForPVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) error { diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index 73e4e3e207..4da206588f 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -577,6 +577,7 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { vrgVRDeleteEnsureTestCase.promoteVolReps() vrgVRDeleteEnsureTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) }) + //nolint:dupl It("ensures orderly cleanup post VolumeReplication deletion", func() { By("Protecting the VolumeReplication resources from deletion") vrgVRDeleteEnsureTestCase.protectDeletionOfVolReps() @@ -614,6 +615,149 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { }) }) + // Test VRG finalizer removal during deletion is deferred till VGR is deleted + var vrgVGRDeleteEnsureTestCase *vrgTest + Context("in primary state", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgVGRDeleteEnsureTestCase = newVRGTestCaseCreate(1, createTestTemplate, true, false) + vrgVGRDeleteEnsureTestCase.repGroup = true + vrgVGRDeleteEnsureTestCase.VRGTestCaseStart() + }) + It("waits for VRG to create a VGR for all PVCs", func() { + expectedVRCount := 1 + vrgVGRDeleteEnsureTestCase.waitForVGRCountToMatch(expectedVRCount) + }) + It("waits for VRG status to match", func() { + vrgVGRDeleteEnsureTestCase.promoteVolGroupReps() + vrgVGRDeleteEnsureTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + //nolint:dupl + It("ensures orderly cleanup post VolumeGroupReplication deletion", func() { + By("Protecting the VolumeGroupReplication resources from deletion") + vrgVGRDeleteEnsureTestCase.protectDeletionOfVolGroupReps() + + By("Starting the VRG deletion process") + vrgVGRDeleteEnsureTestCase.cleanupPVCs(pvcProtectedVerify, vrAndPvcDeletionTimestampsRecentVerify) + vrg := vrgVGRDeleteEnsureTestCase.getVRG() + Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed()) + + By("Ensuring VRG is not deleted till VGR is present") + Consistently(apiReader.Get, vrgtimeout, vrginterval). + WithArguments(context.TODO(), vrgVGRDeleteEnsureTestCase.vrgNamespacedName(), vrg). + Should(Succeed(), "while waiting for VRG %v to remain undeleted", + vrgVGRDeleteEnsureTestCase.vrgNamespacedName()) + + By("Un-protecting the VolumeReplication resources to ensure their deletion") + vrgVGRDeleteEnsureTestCase.unprotectDeletionOfVolGroupReps() + + By("Ensuring VRG is deleted eventually as a result") + var i int + Eventually(func() error { + i++ + + return apiReader.Get(context.TODO(), vrgVGRDeleteEnsureTestCase.vrgNamespacedName(), vrg) + }, vrgtimeout*2, vrginterval). + Should(MatchError(errors.NewNotFound(schema.GroupResource{ + Group: ramendrv1alpha1.GroupVersion.Group, + Resource: "volumereplicationgroups", + }, vrgVGRDeleteEnsureTestCase.vrgName)), + "polled %d times for VRG to be garbage collected\n"+format.Object(*vrg, 1), i) + + vrgVGRDeleteEnsureTestCase.cleanupNamespace() + vrgVGRDeleteEnsureTestCase.cleanupSC() + vrgVGRDeleteEnsureTestCase.cleanupVGRC() + }) + }) + + // Try the simple case of creating VRG, PVC, PV and + // check whether VolGroupRep resources are created or not + var vrgCreateVGRTestCase *vrgTest + Context("in primary state", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgCreateVGRTestCase = newVRGTestCaseCreate(3, createTestTemplate, true, false) + vrgCreateVGRTestCase.repGroup = true + vrgCreateVGRTestCase.VRGTestCaseStart() + }) + It("waits for VRG to create a VGR for all PVCs", func() { + expectedVGRCount := 1 + vrgCreateVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("waits for VRG status to match", func() { + vrgCreateVGRTestCase.promoteVolGroupReps() + vrgCreateVGRTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + It("cleans up after testing", func() { + vrgCreateVGRTestCase.cleanupProtected() + }) + }) + + // Creates VRG. PVCs and PV are created with Status.Phase + // set to pending and VolGroupRep should not be created until + // all the PVCs and PVs are bound. So, these tests then + // change the Status.Phase of PVCs and PVs to bound state, + // and then checks whether VolGroupRep + // resource have been created or not. + var vrgPVCnotBoundVGRTestCase *vrgTest + Context("in primary state", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimPending, + VolumeBindInfo: corev1.VolumePending, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgPVCnotBoundVGRTestCase = newVRGTestCaseCreate(3, createTestTemplate, false, false) + vrgPVCnotBoundVGRTestCase.repGroup = true + vrgPVCnotBoundVGRTestCase.VRGTestCaseStart() + }) + It("expect no VR to be created as PVC not bound", func() { + expectedVGRCount := 0 + vrgPVCnotBoundVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("bind each pv to corresponding pvc", func() { + vrgPVCnotBoundVGRTestCase.bindPVAndPVC() + vrgPVCnotBoundVGRTestCase.verifyPVCBindingToPV(true) + }) + It("waits for VRG to create one VGR resource for all PVCs", func() { + expectedVGRCount := 1 + vrgPVCnotBoundVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("waits for VRG status to match", func() { + vrgPVCnotBoundVGRTestCase.promoteVolGroupReps() + vrgPVCnotBoundVGRTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + It("cleans up after testing", func() { + vrgPVCnotBoundVGRTestCase.cleanupProtected() + }) + }) + // Try the simple case of creating VRG, PVC, PV and // check whether VolRep resources are created or not var vrgTestCases []*vrgTest @@ -1149,6 +1293,7 @@ type vrgTest struct { skipCreationPVandPVC bool checkBind bool vrgFirst bool + repGroup bool template *template } @@ -1214,7 +1359,12 @@ func (v *vrgTest) VRGTestCaseStart() { By("Creating namespace " + v.namespace) v.createNamespace() v.createSC(v.template) - v.createVRC(v.template) + + if v.repGroup { + v.createVGRC(v.template) + } else { + v.createVRC(v.template) + } if v.vrgFirst { v.createVRG() @@ -1490,11 +1640,14 @@ func (v *vrgTest) bindPVAndPVC() { i := i // capture i for use in closure // Bind PVC - pvc := getPVC(v.pvcNames[i]) - pvc.Status.Phase = corev1.ClaimBound - err = k8sClient.Status().Update(context.TODO(), pvc) - Expect(err).To(BeNil(), - "failed to update status of PVC %s", v.pvcNames[i]) + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + pvc := getPVC(v.pvcNames[i]) + pvc.Status.Phase = corev1.ClaimBound + + return k8sClient.Status().Update(context.TODO(), pvc) + }) + + Expect(retryErr).NotTo(HaveOccurred()) } } @@ -1526,6 +1679,15 @@ func (v *vrgTest) createVRG() { S3Profiles: v.template.s3Profiles, }, } + + if v.repGroup { + if vrg.ObjectMeta.Annotations == nil { + vrg.ObjectMeta.Annotations = map[string]string{} + } + + vrg.ObjectMeta.Annotations[util.IsCGEnabledAnnotation] = "true" + } + err := k8sClient.Create(context.TODO(), vrg) expectedErr := errors.NewAlreadyExists( schema.GroupResource{ @@ -1605,6 +1767,45 @@ func (v *vrgTest) createVRC(testTemplate *template) { } } +func (v *vrgTest) createVGRC(testTemplate *template) { + defaultAnnotations := map[string]string{} + defaultAnnotations["replication.storage.openshift.io/is-default-class"] = "true" + + By("creating VGRC " + v.replicationClass) + + parameters := make(map[string]string) + + if testTemplate.schedulingInterval != "" { + parameters["schedulingInterval"] = testTemplate.schedulingInterval + } + + vrc := &volrep.VolumeGroupReplicationClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: v.replicationClass, + Namespace: v.namespace, + Annotations: defaultAnnotations, + }, + Spec: volrep.VolumeGroupReplicationClassSpec{ + Provisioner: testTemplate.vrcProvisioner, + Parameters: parameters, + }, + } + + if len(testTemplate.replicationClassLabels) > 0 { + vrc.ObjectMeta.Labels = testTemplate.replicationClassLabels + } + + err := k8sClient.Create(context.TODO(), vrc) + if err != nil { + if errors.IsAlreadyExists(err) { + err = k8sClient.Get(context.TODO(), types.NamespacedName{Name: v.replicationClass}, vrc) + } + } + + Expect(err).NotTo(HaveOccurred(), + "failed to create/get VolumeGroupReplicationClass %s/%s", v.replicationClass, v.vrgName) +} + func (v *vrgTest) createSC(testTemplate *template) { By("creating StorageClass " + v.storageClass) @@ -1615,6 +1816,9 @@ func (v *vrgTest) createSC(testTemplate *template) { sc := &storagev1.StorageClass{ ObjectMeta: metav1.ObjectMeta{ Name: v.storageClass, + Labels: map[string]string{ + vrgController.StorageIDLabel: "test-storageid", + }, }, Provisioner: testTemplate.scProvisioner, } @@ -1884,6 +2088,7 @@ func (v *vrgTest) cleanup( v.cleanupNamespace() v.cleanupSC() v.cleanupVRC() + v.cleanupVGRC() } func (v *vrgTest) cleanupPVCs( @@ -2131,6 +2336,26 @@ func (v *vrgTest) cleanupVRC() { "failed to delete replicationClass %s", v.replicationClass) } +func (v *vrgTest) cleanupVGRC() { + key := types.NamespacedName{ + Name: v.replicationClass, + Namespace: v.namespace, + } + + vgrc := &volrep.VolumeGroupReplicationClass{} + + err := k8sClient.Get(context.TODO(), key, vgrc) + if err != nil { + if errors.IsNotFound(err) { + return + } + } + + err = k8sClient.Delete(context.TODO(), vgrc) + Expect(err).To(BeNil(), + "failed to delete replicationClass %s", v.replicationClass) +} + func (v *vrgTest) cleanupNamespace() { By("deleting namespace " + v.namespace) @@ -2163,6 +2388,24 @@ func (v *vrgTest) waitForVRCountToMatch(vrCount int) { vrCount, v.vrgName, v.namespace) } +func (v *vrgTest) waitForVGRCountToMatch(vgrCount int) { + By("Waiting for VRs count to match " + v.namespace) + + Eventually(func() int { + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + volGroupRepList := &volrep.VolumeGroupReplicationList{} + err := k8sClient.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), + "failed to get a list of VGRs in namespace %s", v.namespace) + + return len(volGroupRepList.Items) + }, timeout, interval).Should(BeNumerically("==", vgrCount), + "while waiting for VGR count of %d in VRG %s of namespace %s", + vgrCount, v.vrgName, v.namespace) +} + func (v *vrgTest) promoteVolReps() { v.promoteVolRepsAndDo(func(index, count int) { // VRG should not be ready until last VolRep is ready. @@ -2170,10 +2413,18 @@ func (v *vrgTest) promoteVolReps() { }) } +func (v *vrgTest) promoteVolGroupReps() { + v.promoteVolGroupRepsAndDo(func(index, count int) { + // VRG should not be ready until last VolRep is ready. + v.verifyVRGStatusExpectation(index == count-1, vrgController.VRGConditionReasonReady) + }) +} + func (v *vrgTest) promoteVolRepsWithoutVrgStatusCheck() { v.promoteVolRepsAndDo(func(index, count int) {}) } +//nolint:dupl func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { By("Promoting VolumeReplication resources " + v.namespace) @@ -2230,6 +2481,65 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { } } +// nolint: dupl +func (v *vrgTest) promoteVolGroupRepsAndDo(do func(int, int)) { + By("Promoting VolumeGroupReplication resources " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := k8sClient.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroup := volGroupRepList.Items[index] + + volGroupRepStatus := volrep.VolumeGroupReplicationStatus{ + VolumeReplicationStatus: volrep.VolumeReplicationStatus{ + Conditions: []metav1.Condition{ + { + Type: volrep.ConditionCompleted, + Reason: volrep.Promoted, + ObservedGeneration: volGroup.Generation, + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.NewTime(time.Now()), + }, + { + Type: volrep.ConditionDegraded, + Reason: volrep.Healthy, + ObservedGeneration: volGroup.Generation, + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.NewTime(time.Now()), + }, + { + Type: volrep.ConditionResyncing, + Reason: volrep.NotResyncing, + ObservedGeneration: volGroup.Generation, + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.NewTime(time.Now()), + }, + }, + }, + } + volGroupRepStatus.ObservedGeneration = volGroup.Generation + volGroupRepStatus.State = volrep.PrimaryState + volGroupRepStatus.Message = "volume is marked primary" + volGroup.Status = volGroupRepStatus + + err = k8sClient.Status().Update(context.TODO(), &volGroup) + Expect(err).NotTo(HaveOccurred(), "failed to update the status of VolGroupRep %s", volGroup.Name) + + volrepKey := types.NamespacedName{ + Name: volGroup.Name, + Namespace: volGroup.Namespace, + } + v.waitForVolGroupRepPromotion(volrepKey) + + do(index, len(volGroupRepList.Items)) + } +} + func (v *vrgTest) protectDeletionOfVolReps() { By("Adding a finalizer to protect VolumeReplication resources being deleted " + v.namespace) @@ -2249,6 +2559,25 @@ func (v *vrgTest) protectDeletionOfVolReps() { } } +func (v *vrgTest) protectDeletionOfVolGroupReps() { + By("Adding a finalizer to protect VolumeGroupReplication resources being deleted " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := apiReader.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VGRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroupRep := volGroupRepList.Items[index] + if controllerutil.AddFinalizer(client.Object(&volGroupRep), "testDeleteProtected") { + err = k8sClient.Update(context.TODO(), &volGroupRep) + Expect(err).NotTo(HaveOccurred(), "failed to add finalizer to VolGroupRep %s", volGroupRep.Name) + } + } +} + func (v *vrgTest) unprotectDeletionOfVolReps() { By("Removing finalizer that protects VolumeReplication resources from being deleted " + v.namespace) @@ -2268,6 +2597,25 @@ func (v *vrgTest) unprotectDeletionOfVolReps() { } } +func (v *vrgTest) unprotectDeletionOfVolGroupReps() { + By("Removing finalizer that protects VolumeGroupReplication resources from being deleted " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := apiReader.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VGRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroupRep := volGroupRepList.Items[index] + if controllerutil.RemoveFinalizer(client.Object(&volGroupRep), "testDeleteProtected") { + err = k8sClient.Update(context.TODO(), &volGroupRep) + Expect(err).NotTo(HaveOccurred(), "failed to remove finalizer to VolGroupRep %s", volGroupRep.Name) + } + } +} + func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) { updatedVolRep := volrep.VolumeReplication{} @@ -2296,6 +2644,55 @@ func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) "while waiting for protected pvc condition %s/%s", updatedVolRep.Namespace, updatedVolRep.Name) } +func (v *vrgTest) waitForVolGroupRepPromotion(vrNamespacedName types.NamespacedName) { + updatedVolGroupRep := volrep.VolumeGroupReplication{} + + Eventually(func() bool { + err := k8sClient.Get(context.TODO(), vrNamespacedName, &updatedVolGroupRep) + + return err == nil && len(updatedVolGroupRep.Status.Conditions) == 3 + }, vrgtimeout, vrginterval).Should(BeTrue(), + "failed to wait for volRep condition type to change to 'ConditionCompleted' (%d)", + len(updatedVolGroupRep.Status.Conditions)) + + Eventually(func() bool { + vrg := v.getVRG() + + pvcLabelSelector := updatedVolGroupRep.Spec.Source.Selector + + pvcSelector, err := metav1.LabelSelectorAsSelector(pvcLabelSelector) + if err != nil { + return false + } + listOptions := []client.ListOption{ + client.MatchingLabelsSelector{ + Selector: pvcSelector, + }, + } + + pvcList := &corev1.PersistentVolumeClaimList{} + if err := k8sClient.List(context.TODO(), pvcList, listOptions...); err != nil { + return false + } + + protected := false + for idx := range pvcList.Items { + pvc := pvcList.Items[idx] + protectedPVC := vrgController.FindProtectedPVC(vrg, pvc.Namespace, pvc.Name) + if protectedPVC == nil { + continue + } + protected = v.checkProtectedPVCSuccess(vrg, protectedPVC) + if !protected { + return false + } + } + + return protected + }, vrgtimeout, vrginterval).Should(BeTrue(), + "while waiting for protected pvc condition %s/%s", updatedVolGroupRep.Namespace, updatedVolGroupRep.Name) +} + func (v *vrgTest) checkProtectedPVCSuccess(vrg *ramendrv1alpha1.VolumeReplicationGroup, protectedPVC *ramendrv1alpha1.ProtectedPVC, ) bool { diff --git a/test/addons/rook-pool/storage-class.yaml b/test/addons/rook-pool/storage-class.yaml index 36a077b7bd..79069d9653 100644 --- a/test/addons/rook-pool/storage-class.yaml +++ b/test/addons/rook-pool/storage-class.yaml @@ -6,6 +6,8 @@ apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: rook-ceph-block + labels: + ramendr.openshift.io/storageid: rook-ceph-storage-id provisioner: rook-ceph.rbd.csi.ceph.com parameters: clusterID: rook-ceph