Skip to content

Commit

Permalink
Merge pull request #7377 from allenxu404/restore-finalization-impleme…
Browse files Browse the repository at this point in the history
…ntation

Add the finalization phase to the restore workflow
  • Loading branch information
reasonerjt authored Feb 29, 2024
2 parents e727d29 + 2b8bb87 commit edd0d3b
Show file tree
Hide file tree
Showing 14 changed files with 544 additions and 27 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/7377-allenxu404
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add the finalization phase to the restore workflow
2 changes: 2 additions & 0 deletions config/crd/v1/bases/velero.io_restores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ spec:
- Completed
- PartiallyFailed
- Failed
- Finalizing
- FinalizingPartiallyFailed
type: string
progress:
description: Progress contains information about the restore's execution
Expand Down
2 changes: 1 addition & 1 deletion config/crd/v1/crds/crds.go

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion pkg/apis/velero/v1/restore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ type InitRestoreHook struct {

// RestorePhase is a string representation of the lifecycle phase
// of a Velero restore
// +kubebuilder:validation:Enum=New;FailedValidation;InProgress;WaitingForPluginOperations;WaitingForPluginOperationsPartiallyFailed;Completed;PartiallyFailed;Failed
// +kubebuilder:validation:Enum=New;FailedValidation;InProgress;WaitingForPluginOperations;WaitingForPluginOperationsPartiallyFailed;Completed;PartiallyFailed;Failed;Finalizing;FinalizingPartiallyFailed
type RestorePhase string

const (
Expand Down Expand Up @@ -277,6 +277,19 @@ const (
// ongoing. The restore is not complete yet.
RestorePhaseWaitingForPluginOperationsPartiallyFailed RestorePhase = "WaitingForPluginOperationsPartiallyFailed"

// RestorePhaseFinalizing means the restore of
// Kubernetes resources and other async plugin operations were successful and
// other plugin operations are now complete, but the restore is awaiting
// the completion of wrap-up tasks before the restore process enters terminal phase.
RestorePhaseFinalizing RestorePhase = "Finalizing"

// RestorePhaseFinalizingPartiallyFailed means the restore of
// Kubernetes resources and other async plugin operations were successful and
// other plugin operations are now complete, but one or more errors
// occurred during restore or async operation processing. The restore is awaiting
// the completion of wrap-up tasks before the restore process enters terminal phase.
RestorePhaseFinalizingPartiallyFailed RestorePhase = "FinalizingPartiallyFailed"

// RestorePhaseCompleted means the restore has run successfully
// without errors.
RestorePhaseCompleted RestorePhase = "Completed"
Expand Down
14 changes: 14 additions & 0 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
controller.RestoreOperations: {},
controller.Schedule: {},
controller.ServerStatusRequest: {},
controller.RestoreFinalizer: {},
}

if s.config.restoreOnly {
Expand Down Expand Up @@ -983,6 +984,19 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}
}

if _, ok := enabledRuntimeControllers[controller.RestoreFinalizer]; ok {
if err := controller.NewRestoreFinalizerReconciler(
s.logger,
s.namespace,
s.mgr.GetClient(),
newPluginManager,
backupStoreGetter,
s.metrics,
).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.RestoreFinalizer)
}
}

s.logger.Info("Server starting...")

if err := s.mgr.Start(s.ctx); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/controller/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
RestoreOperations = "restore-operations"
Schedule = "schedule"
ServerStatusRequest = "server-status-request"
RestoreFinalizer = "restore-finalizer"
)

// DisableableControllers is a list of controllers that can be disabled
Expand All @@ -48,4 +49,5 @@ var DisableableControllers = []string{
RestoreOperations,
Schedule,
ServerStatusRequest,
RestoreFinalizer,
}
10 changes: 4 additions & 6 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,18 +645,16 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu
r.logger.Debug("Restore WaitingForPluginOperationsPartiallyFailed")
restore.Status.Phase = api.RestorePhaseWaitingForPluginOperationsPartiallyFailed
} else {
r.logger.Debug("Restore partially failed")
restore.Status.Phase = api.RestorePhasePartiallyFailed
r.metrics.RegisterRestorePartialFailure(restore.Spec.ScheduleName)
r.logger.Debug("Restore FinalizingPartiallyFailed")
restore.Status.Phase = api.RestorePhaseFinalizingPartiallyFailed
}
} else {
if inProgressOperations {
r.logger.Debug("Restore WaitingForPluginOperations")
restore.Status.Phase = api.RestorePhaseWaitingForPluginOperations
} else {
r.logger.Debug("Restore completed")
restore.Status.Phase = api.RestorePhaseCompleted
r.metrics.RegisterRestoreSuccess(restore.Spec.ScheduleName)
r.logger.Debug("Restore Finalizing")
restore.Status.Phase = api.RestorePhaseFinalizing
}
}
return nil
Expand Down
213 changes: 213 additions & 0 deletions pkg/controller/restore_finalizer_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*
Copyright the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"context"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/clock"

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/persistence"
"github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/results"
)

type restoreFinalizerReconciler struct {
client.Client
namespace string
logger logrus.FieldLogger
newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
metrics *metrics.ServerMetrics
clock clock.WithTickerAndDelayedExecution
}

func NewRestoreFinalizerReconciler(
logger logrus.FieldLogger,
namespace string,
client client.Client,
newPluginManager func(logrus.FieldLogger) clientmgmt.Manager,
backupStoreGetter persistence.ObjectBackupStoreGetter,
metrics *metrics.ServerMetrics,
) *restoreFinalizerReconciler {
return &restoreFinalizerReconciler{
Client: client,
logger: logger,
namespace: namespace,
newPluginManager: newPluginManager,
backupStoreGetter: backupStoreGetter,
metrics: metrics,
clock: &clock.RealClock{},
}
}

func (r *restoreFinalizerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&velerov1api.Restore{}).
Complete(r)
}

// +kubebuilder:rbac:groups=velero.io,resources=restores,verbs=get;list;watch;update
// +kubebuilder:rbac:groups=velero.io,resources=restores/status,verbs=get
func (r *restoreFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.logger.WithField("restore finalizer", req.String())
log.Debug("restoreFinalizerReconciler getting restore")

original := &velerov1api.Restore{}
if err := r.Get(ctx, req.NamespacedName, original); err != nil {
if apierrors.IsNotFound(err) {
log.WithError(err).Error("restore not found")
return ctrl.Result{}, nil
}
return ctrl.Result{}, errors.Wrapf(err, "error getting restore %s", req.String())
}
restore := original.DeepCopy()
log.Debugf("restore: %s", restore.Name)

log = r.logger.WithFields(
logrus.Fields{
"restore": req.String(),
},
)

switch restore.Status.Phase {
case velerov1api.RestorePhaseFinalizing, velerov1api.RestorePhaseFinalizingPartiallyFailed:
default:
log.Debug("Restore is not awaiting finalization, skipping")
return ctrl.Result{}, nil
}

info, err := fetchBackupInfoInternal(r.Client, r.namespace, restore.Spec.BackupName)
if err != nil {
if apierrors.IsNotFound(err) {
log.WithError(err).Error("not found backup, skip")
if err2 := r.finishProcessing(velerov1api.RestorePhasePartiallyFailed, restore, original); err2 != nil {
log.WithError(err2).Error("error updating restore's final status")
return ctrl.Result{}, errors.Wrap(err2, "error updating restore's final status")
}
return ctrl.Result{}, nil
}
log.WithError(err).Error("error getting backup info")
return ctrl.Result{}, errors.Wrap(err, "error getting backup info")
}

pluginManager := r.newPluginManager(r.logger)
defer pluginManager.CleanupClients()
backupStore, err := r.backupStoreGetter.Get(info.location, pluginManager, r.logger)
if err != nil {
log.WithError(err).Error("error getting backup store")
return ctrl.Result{}, errors.Wrap(err, "error getting backup store")
}

finalizerCtx := &finalizerContext{log: log}
warnings, errs := finalizerCtx.execute()

warningCnt := len(warnings.Velero) + len(warnings.Cluster)
for _, w := range warnings.Namespaces {
warningCnt += len(w)
}
errCnt := len(errs.Velero) + len(errs.Cluster)
for _, e := range errs.Namespaces {
errCnt += len(e)
}
restore.Status.Warnings += warningCnt
restore.Status.Errors += errCnt

if !errs.IsEmpty() {
restore.Status.Phase = velerov1api.RestorePhaseFinalizingPartiallyFailed
}

if warningCnt > 0 || errCnt > 0 {
err := r.updateResults(backupStore, restore, &warnings, &errs)
if err != nil {
log.WithError(err).Error("error updating results")
return ctrl.Result{}, errors.Wrap(err, "error updating results")
}
}

finalPhase := velerov1api.RestorePhaseCompleted
if restore.Status.Phase == velerov1api.RestorePhaseFinalizingPartiallyFailed {
finalPhase = velerov1api.RestorePhasePartiallyFailed
}
log.Infof("Marking restore %s", finalPhase)

if err := r.finishProcessing(finalPhase, restore, original); err != nil {
log.WithError(err).Error("error updating restore's final status")
return ctrl.Result{}, errors.Wrap(err, "error updating restore's final status")
}

return ctrl.Result{}, nil
}

func (r *restoreFinalizerReconciler) updateResults(backupStore persistence.BackupStore, restore *velerov1api.Restore, newWarnings *results.Result, newErrs *results.Result) error {
originResults, err := backupStore.GetRestoreResults(restore.Name)
if err != nil {
return errors.Wrap(err, "error getting restore results")
}
warnings := originResults["warnings"]
errs := originResults["errors"]
warnings.Merge(newWarnings)
errs.Merge(newErrs)

m := map[string]results.Result{
"warnings": warnings,
"errors": errs,
}
if err := putResults(restore, m, backupStore); err != nil {
return errors.Wrap(err, "error putting restore results")
}

return nil
}

func (r *restoreFinalizerReconciler) finishProcessing(restorePhase velerov1api.RestorePhase, restore *velerov1api.Restore, original *velerov1api.Restore) error {
if restorePhase == velerov1api.RestorePhasePartiallyFailed {
restore.Status.Phase = velerov1api.RestorePhasePartiallyFailed
r.metrics.RegisterRestorePartialFailure(restore.Spec.ScheduleName)
} else {
restore.Status.Phase = velerov1api.RestorePhaseCompleted
r.metrics.RegisterRestoreSuccess(restore.Spec.ScheduleName)
}
restore.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now()}

return kubeutil.PatchResource(original, restore, r.Client)
}

// finalizerContext includes all the dependencies required by finalization tasks and
// a function execute() to orderly implement task logic.
type finalizerContext struct {
log logrus.FieldLogger
}

func (ctx *finalizerContext) execute() (results.Result, results.Result) { //nolint:unparam //temporarily ignore the lint report: result 0 is always nil (unparam)
warnings, errs := results.Result{}, results.Result{}

// implement finalization tasks
ctx.log.Debug("Starting running execute()")

return warnings, errs
}
Loading

0 comments on commit edd0d3b

Please sign in to comment.