From c98b2594a69324c17587140ddca0d69e5a3c5190 Mon Sep 17 00:00:00 2001 From: Baptiste Girard-Carrabin Date: Wed, 7 Aug 2024 19:18:18 +0200 Subject: [PATCH] [snapshot-controller] Retry PVC finalizer removal on conflict The previous `removePVCFinalizer` function was using the PVC stored in the informer which, in cases where the PVC had been modified since, lead to conflict errors when trying to remove the PVC finalizer through an update. Now the `removePVCFinalizer` function uses the `RetryOnConflict` helper to make sure the update goes through. --- pkg/common-controller/snapshot_controller.go | 28 ++--- vendor/k8s.io/client-go/util/retry/OWNERS | 4 + vendor/k8s.io/client-go/util/retry/util.go | 105 +++++++++++++++++++ vendor/modules.txt | 1 + 4 files changed, 126 insertions(+), 12 deletions(-) create mode 100644 vendor/k8s.io/client-go/util/retry/OWNERS create mode 100644 vendor/k8s.io/client-go/util/retry/util.go diff --git a/pkg/common-controller/snapshot_controller.go b/pkg/common-controller/snapshot_controller.go index 595986c3a..0ae4b637a 100644 --- a/pkg/common-controller/snapshot_controller.go +++ b/pkg/common-controller/snapshot_controller.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes/scheme" ref "k8s.io/client-go/tools/reference" + "k8s.io/client-go/util/retry" corev1helpers "k8s.io/component-helpers/scheduling/corev1" klog "k8s.io/klog/v2" @@ -930,19 +931,22 @@ func (ctrl *csiSnapshotCommonController) ensurePVCFinalizer(snapshot *crdv1.Volu // removePVCFinalizer removes a Finalizer for VolumeSnapshot Source PVC. func (ctrl *csiSnapshotCommonController) removePVCFinalizer(pvc *v1.PersistentVolumeClaim) error { - // Get snapshot source which is a PVC - // TODO(xyang): We get PVC from informer but it may be outdated - // Should get it from API server directly before removing finalizer - pvcClone := pvc.DeepCopy() - pvcClone.ObjectMeta.Finalizers = utils.RemoveString(pvcClone.ObjectMeta.Finalizers, utils.PVCFinalizer) - - _, err := ctrl.client.CoreV1().PersistentVolumeClaims(pvcClone.Namespace).Update(context.TODO(), pvcClone, metav1.UpdateOptions{}) - if err != nil { - return newControllerUpdateError(pvcClone.Name, err.Error()) - } + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Get snapshot source which is a PVC + newPvc, err := ctrl.client.CoreV1().PersistentVolumeClaims(pvc.Namespace).Get(context.TODO(), pvc.Name, metav1.GetOptions{}) + if err != nil { + return err + } + newPvc = newPvc.DeepCopy() + newPvc.ObjectMeta.Finalizers = utils.RemoveString(newPvc.ObjectMeta.Finalizers, utils.PVCFinalizer) + _, err = ctrl.client.CoreV1().PersistentVolumeClaims(newPvc.Namespace).Update(context.TODO(), newPvc, metav1.UpdateOptions{}) + if err != nil { + return newControllerUpdateError(newPvc.Name, err.Error()) + } - klog.V(5).Infof("Removed protection finalizer from persistent volume claim %s", pvc.Name) - return nil + klog.V(5).Infof("Removed protection finalizer from persistent volume claim %s", pvc.Name) + return nil + }) } // isPVCBeingUsed checks if a PVC is being used as a source to create a snapshot. diff --git a/vendor/k8s.io/client-go/util/retry/OWNERS b/vendor/k8s.io/client-go/util/retry/OWNERS new file mode 100644 index 000000000..75736b5aa --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/OWNERS @@ -0,0 +1,4 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: + - caesarxuchao diff --git a/vendor/k8s.io/client-go/util/retry/util.go b/vendor/k8s.io/client-go/util/retry/util.go new file mode 100644 index 000000000..0c6e504a6 --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/util.go @@ -0,0 +1,105 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package retry + +import ( + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" +) + +// DefaultRetry is the recommended retry for a conflict where multiple clients +// are making changes to the same resource. +var DefaultRetry = wait.Backoff{ + Steps: 5, + Duration: 10 * time.Millisecond, + Factor: 1.0, + Jitter: 0.1, +} + +// DefaultBackoff is the recommended backoff for a conflict where a client +// may be attempting to make an unrelated modification to a resource under +// active management by one or more controllers. +var DefaultBackoff = wait.Backoff{ + Steps: 4, + Duration: 10 * time.Millisecond, + Factor: 5.0, + Jitter: 0.1, +} + +// OnError allows the caller to retry fn in case the error returned by fn is retriable +// according to the provided function. backoff defines the maximum retries and the wait +// interval between two retries. +func OnError(backoff wait.Backoff, retriable func(error) bool, fn func() error) error { + var lastErr error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := fn() + switch { + case err == nil: + return true, nil + case retriable(err): + lastErr = err + return false, nil + default: + return false, err + } + }) + if err == wait.ErrWaitTimeout { + err = lastErr + } + return err +} + +// RetryOnConflict is used to make an update to a resource when you have to worry about +// conflicts caused by other code making unrelated updates to the resource at the same +// time. fn should fetch the resource to be modified, make appropriate changes to it, try +// to update it, and return (unmodified) the error from the update function. On a +// successful update, RetryOnConflict will return nil. If the update function returns a +// "Conflict" error, RetryOnConflict will wait some amount of time as described by +// backoff, and then try again. On a non-"Conflict" error, or if it retries too many times +// and gives up, RetryOnConflict will return an error to the caller. +// +// err := retry.RetryOnConflict(retry.DefaultRetry, func() error { +// // Fetch the resource here; you need to refetch it on every try, since +// // if you got a conflict on the last update attempt then you need to get +// // the current version before making your own changes. +// pod, err := c.Pods("mynamespace").Get(name, metav1.GetOptions{}) +// if err != nil { +// return err +// } +// +// // Make whatever updates to the resource are needed +// pod.Status.Phase = v1.PodFailed +// +// // Try to update +// _, err = c.Pods("mynamespace").UpdateStatus(pod) +// // You have to return err itself here (not wrapped inside another error) +// // so that RetryOnConflict can identify it correctly. +// return err +// }) +// if err != nil { +// // May be conflict if max retries were hit, or may be something unrelated +// // like permissions or a network error +// return err +// } +// ... +// +// TODO: Make Backoff an interface? +func RetryOnConflict(backoff wait.Backoff, fn func() error) error { + return OnError(backoff, errors.IsConflict, fn) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 6f275a887..bfbcdbafd 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -770,6 +770,7 @@ k8s.io/client-go/util/consistencydetector k8s.io/client-go/util/flowcontrol k8s.io/client-go/util/homedir k8s.io/client-go/util/keyutil +k8s.io/client-go/util/retry k8s.io/client-go/util/watchlist k8s.io/client-go/util/workqueue # k8s.io/component-base v0.31.0-rc.0 => k8s.io/component-base v0.31.0-rc.0