From 1b346a77cfe72edb346b9b0f0a7de7babf8fd5de Mon Sep 17 00:00:00 2001 From: Tanmay Satam Date: Thu, 9 Nov 2023 09:30:23 -0500 Subject: [PATCH] Store last storageaccount reconciliation state on cluster resource; avoid unnecessary reconciles --- .../v1alpha1/cluster_types.go | 8 ++ .../v1alpha1/zz_generated.deepcopy.go | 27 +++++++ .../storageaccount_controller.go | 44 ++++++++++- .../storageaccount_controller_test.go | 77 +++++++++++++++++-- .../aro.openshift.io_clusters.yaml | 16 ++++ 5 files changed, 165 insertions(+), 7 deletions(-) diff --git a/pkg/operator/apis/aro.openshift.io/v1alpha1/cluster_types.go b/pkg/operator/apis/aro.openshift.io/v1alpha1/cluster_types.go index d8008e1a019..15051334a56 100644 --- a/pkg/operator/apis/aro.openshift.io/v1alpha1/cluster_types.go +++ b/pkg/operator/apis/aro.openshift.io/v1alpha1/cluster_types.go @@ -130,6 +130,14 @@ type ClusterStatus struct { OperatorVersion string `json:"operatorVersion,omitempty"` Conditions []operatorv1.OperatorCondition `json:"conditions,omitempty"` RedHatKeysPresent []string `json:"redHatKeysPresent,omitempty"` + StorageAccounts StorageAccountsStatus `json:"storageAccounts,omitempty"` +} + +// StorageAccountsStatus contains the parameters of the last successful storage account reconciliation +type StorageAccountsStatus struct { + LastCompletionTime metav1.Time `json:"lastCompletionTime,omitempty"` + Subnets []string `json:"subnets,omitempty"` + StorageAccounts []string `json:"storageAccounts,omitempty"` } // Cluster is the Schema for the clusters API diff --git a/pkg/operator/apis/aro.openshift.io/v1alpha1/zz_generated.deepcopy.go b/pkg/operator/apis/aro.openshift.io/v1alpha1/zz_generated.deepcopy.go index f71411d2639..bf298148d82 100644 --- a/pkg/operator/apis/aro.openshift.io/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/operator/apis/aro.openshift.io/v1alpha1/zz_generated.deepcopy.go @@ -134,6 +134,7 @@ func (in *ClusterStatus) DeepCopyInto(out *ClusterStatus) { *out = make([]string, len(*in)) copy(*out, *in) } + in.StorageAccounts.DeepCopyInto(&out.StorageAccounts) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterStatus. @@ -201,3 +202,29 @@ func (in OperatorFlags) DeepCopy() OperatorFlags { in.DeepCopyInto(out) return *out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StorageAccountsStatus) DeepCopyInto(out *StorageAccountsStatus) { + *out = *in + in.LastCompletionTime.DeepCopyInto(&out.LastCompletionTime) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.StorageAccounts != nil { + in, out := &in.StorageAccounts, &out.StorageAccounts + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StorageAccountsStatus. +func (in *StorageAccountsStatus) DeepCopy() *StorageAccountsStatus { + if in == nil { + return nil + } + out := new(StorageAccountsStatus) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/operator/controllers/storageaccounts/storageaccount_controller.go b/pkg/operator/controllers/storageaccounts/storageaccount_controller.go index 4d3ca05c24f..ab519d4ecbf 100644 --- a/pkg/operator/controllers/storageaccounts/storageaccount_controller.go +++ b/pkg/operator/controllers/storageaccounts/storageaccount_controller.go @@ -7,6 +7,8 @@ import ( "context" "fmt" "net/http" + "reflect" + "sort" "time" "github.com/Azure/go-autorest/autorest" @@ -15,6 +17,7 @@ import ( machinev1beta1 "github.com/openshift/api/machine/v1beta1" "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -35,6 +38,9 @@ const ( ControllerName = "StorageAccounts" controllerEnabled = "aro.storageaccounts.enabled" + + // we should not attempt to perform reconciliations against the Azure API more than once within this time period + reconcileTimeout = time.Hour ) // Reconciler is the controller struct @@ -70,6 +76,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl. r.log.Debug("running") + reconcileTimeCutoff := metav1.NewTime(metav1.Now().Add(-1 * reconcileTimeout)) + if !instance.Status.StorageAccounts.LastCompletionTime.Before(&reconcileTimeCutoff) { + // too early to reconcile + return reconcile.Result{}, nil + } + location := instance.Spec.Location resource, err := azure.ParseResourceID(instance.Spec.ResourceID) if err != nil { @@ -102,6 +114,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl. return reconcile.Result{}, err } + if r.parametersMatchLastReconcile(ctx, instance, subnets, storageAccounts) { + return reconcile.Result{}, nil + } + err = manager.reconcileAccounts(ctx, subnets, storageAccounts) if err != nil { if retryAfter, ok := errIsRateLimited(err); ok { @@ -110,6 +126,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl. return reconcile.Result{}, err } + err = r.updateCompletionStatus(ctx, instance, subnets, storageAccounts) + if err != nil { + return reconcile.Result{}, err + } + return reconcile.Result{}, nil } @@ -147,6 +168,7 @@ func (r *Reconciler) getSubnetsToReconcile(ctx context.Context, instance *arov1a return nil, err } subnets = append(subnets, clusterSubnetsToReconcile...) + sort.Strings(subnets) return subnets, nil } @@ -176,10 +198,28 @@ func (r *Reconciler) getStorageAccounts(ctx context.Context, instance *arov1alph return nil, fmt.Errorf("azure storage field is nil in image registry config") } - return []string{ + storageAccounts := []string{ "cluster" + instance.Spec.StorageSuffix, // this is our creation, so name is deterministic rc.Spec.Storage.Azure.AccountName, - }, nil + } + sort.Strings(storageAccounts) + return storageAccounts, nil +} + +func (r *Reconciler) parametersMatchLastReconcile(ctx context.Context, instance *arov1alpha1.Cluster, subnets, storageAccounts []string) bool { + sa := instance.Status.StorageAccounts + return reflect.DeepEqual(sa.Subnets, subnets) && reflect.DeepEqual(sa.StorageAccounts, storageAccounts) +} + +func (r *Reconciler) updateCompletionStatus(ctx context.Context, instance *arov1alpha1.Cluster, subnets, storageAccounts []string) error { + updatedInstance := instance.DeepCopy() + updatedInstance.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{ + LastCompletionTime: metav1.Now(), + Subnets: subnets, + StorageAccounts: storageAccounts, + } + + return r.client.Status().Patch(ctx, updatedInstance, client.MergeFrom(instance)) } func errIsRateLimited(err error) (time.Duration, bool) { diff --git a/pkg/operator/controllers/storageaccounts/storageaccount_controller_test.go b/pkg/operator/controllers/storageaccounts/storageaccount_controller_test.go index c482a67a309..d48c6544056 100644 --- a/pkg/operator/controllers/storageaccounts/storageaccount_controller_test.go +++ b/pkg/operator/controllers/storageaccounts/storageaccount_controller_test.go @@ -19,12 +19,14 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1" "github.com/Azure/ARO-RP/pkg/util/azureclient" "github.com/Azure/ARO-RP/pkg/util/clusterauthorizer" + "github.com/Azure/ARO-RP/pkg/util/cmp" _ "github.com/Azure/ARO-RP/pkg/util/scheme" utilerror "github.com/Azure/ARO-RP/test/util/error" ) @@ -170,6 +172,7 @@ func TestReconcile(t *testing.T) { fakeReconcileAccounts func(ctx context.Context, subnets, storageAccounts []string) error wantRequeueAfter time.Duration wantErr string + wantStorageAccountsStatus *arov1alpha1.StorageAccountsStatus }{ { name: "no cluster object - returns error", @@ -189,10 +192,6 @@ func TestReconcile(t *testing.T) { }, wantErr: "parsing failed for invalid resource id. Invalid resource Id format", }, - { - name: "correct prerequisites - works as expected", - operatorFlag: true, - }, { name: "error during subnet checks - returns direct error", operatorFlag: true, @@ -225,8 +224,64 @@ func TestReconcile(t *testing.T) { }, wantRequeueAfter: 3600 * time.Second, }, + { + name: "correct prerequisites - works as expected", + operatorFlag: true, + wantStorageAccountsStatus: &arov1alpha1.StorageAccountsStatus{ + LastCompletionTime: metav1.Now(), + Subnets: []string{ + masterSubnetId, + workerSubnetId, + rpPeSubnetId, + rpSubnetId, + gwySubnetId, + }, + StorageAccounts: []string{ + clusterStorageAccountName, + registryStorageAccountName, + }, + }, + }, + { + name: "correct prerequisites but last reconcile was less than one hour ago - does nothing", + operatorFlag: true, + instance: func(c *arov1alpha1.Cluster) { + c.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{ + LastCompletionTime: metav1.NewTime(metav1.Now().Add(-30 * time.Minute)), + } + }, + fakeCheckClusterSubnetsToReconcile: func(ctx context.Context, clusterSubnets []string) ([]string, error) { + return nil, fmt.Errorf("should not check subnets") + }, + fakeReconcileAccounts: func(ctx context.Context, subnets, storageAccounts []string) error { + return fmt.Errorf("should not reconcile accounts") + }, + }, + { + name: "correct prerequisites but subnets to reconcile match last reconcile - does nothing", + operatorFlag: true, + instance: func(c *arov1alpha1.Cluster) { + c.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{ + Subnets: []string{ + masterSubnetId, + workerSubnetId, + rpPeSubnetId, + rpSubnetId, + gwySubnetId, + }, + StorageAccounts: []string{ + clusterStorageAccountName, + registryStorageAccountName, + }, + } + }, + fakeReconcileAccounts: func(ctx context.Context, subnets, storageAccounts []string) error { + return fmt.Errorf("should not reconcile accounts") + }, + }, } { t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() instance := getValidClusterInstance(tt.operatorFlag) if tt.instance != nil { tt.instance(instance) @@ -293,12 +348,24 @@ func TestReconcile(t *testing.T) { newManager: fakeNewManager, } - result, err := r.Reconcile(context.Background(), reconcile.Request{}) + result, err := r.Reconcile(ctx, reconcile.Request{}) utilerror.AssertErrorMessage(t, err, tt.wantErr) if result.RequeueAfter != tt.wantRequeueAfter { t.Errorf("wanted requeue after %v but got %v", tt.wantRequeueAfter, result.RequeueAfter) } + if tt.wantStorageAccountsStatus != nil { + gotInstance := &arov1alpha1.Cluster{} + if err := clientFake.Get(ctx, types.NamespacedName{Name: arov1alpha1.SingletonClusterName}, gotInstance); err != nil { + t.Fatal(err) + } + + gotStorageAccountsStatus := gotInstance.Status.StorageAccounts + + if diff := cmp.Diff(*tt.wantStorageAccountsStatus, gotStorageAccountsStatus, cmpoptsSortStringSlices, cmpopts.EquateApproxTime(time.Second)); diff != "" { + t.Errorf("wanted storageAccountsStatus %v but got %v, diff: %s", tt.wantStorageAccountsStatus, gotStorageAccountsStatus, diff) + } + } }) } } diff --git a/pkg/operator/deploy/staticresources/aro.openshift.io_clusters.yaml b/pkg/operator/deploy/staticresources/aro.openshift.io_clusters.yaml index 19963a5840b..43319cc7cd0 100644 --- a/pkg/operator/deploy/staticresources/aro.openshift.io_clusters.yaml +++ b/pkg/operator/deploy/staticresources/aro.openshift.io_clusters.yaml @@ -140,6 +140,22 @@ spec: items: type: string type: array + storageAccounts: + description: StorageAccountsStatus contains the parameters of the + last successful storage account reconciliation + properties: + lastCompletionTime: + format: date-time + type: string + storageAccounts: + items: + type: string + type: array + subnets: + items: + type: string + type: array + type: object type: object type: object served: true