Skip to content

Commit

Permalink
Store last storageaccount reconciliation state on cluster resource; a…
Browse files Browse the repository at this point in the history
…void unnecessary reconciles
  • Loading branch information
tsatam committed Nov 27, 2023
1 parent e02087c commit d7b8008
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 11 deletions.
8 changes: 8 additions & 0 deletions pkg/operator/apis/aro.openshift.io/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ type ClusterStatus struct {
OperatorVersion string `json:"operatorVersion,omitempty"`
Conditions []operatorv1.OperatorCondition `json:"conditions,omitempty"`
RedHatKeysPresent []string `json:"redHatKeysPresent,omitempty"`
StorageAccounts StorageAccountsStatus `json:"storageAccounts,omitempty"`
}

// StorageAccountsStatus contains the parameters of the last successful storage account reconciliation
type StorageAccountsStatus struct {
LastCompletionTime metav1.Time `json:"lastCompletionTime,omitempty"`
Subnets []string `json:"subnets,omitempty"`
StorageAccounts []string `json:"storageAccounts,omitempty"`
}

// Cluster is the Schema for the clusters API
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ import (
"context"
"fmt"
"net/http"
"reflect"
"sort"
"time"

"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
imageregistryv1 "github.com/openshift/api/imageregistry/v1"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand All @@ -34,6 +37,9 @@ const (
ControllerName = "StorageAccounts"

controllerEnabled = "aro.storageaccounts.enabled"

// we should not attempt to perform reconciliations against the Azure API more than once within this time period
reconcileTimeout = time.Hour
)

// Reconciler is the controller struct
Expand Down Expand Up @@ -69,6 +75,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.

r.log.Debug("running")

// ensure we only reconcile after the last completed reconcile + timeout duration
reconcileTimeCutoff := instance.Status.StorageAccounts.LastCompletionTime.Add(reconcileTimeout)
if diff := reconcileTimeCutoff.Sub(time.Now()); diff > 0 {
return reconcile.Result{RequeueAfter: diff}, nil
}

location := instance.Spec.Location
resource, err := azure.ParseResourceID(instance.Spec.ResourceID)
if err != nil {
Expand Down Expand Up @@ -101,6 +113,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
return reconcile.Result{}, err
}

if r.parametersMatchLastReconcile(ctx, instance, subnets, storageAccounts) {
return reconcile.Result{}, nil
}

err = manager.reconcileAccounts(ctx, subnets, storageAccounts)
if err != nil {
if retryAfter, ok := errIsRateLimited(err); ok {
Expand All @@ -109,6 +125,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
return reconcile.Result{}, err
}

err = r.updateCompletionStatus(ctx, instance, subnets, storageAccounts)
if err != nil {
return reconcile.Result{}, err
}

return reconcile.Result{}, nil
}

Expand Down Expand Up @@ -146,6 +167,7 @@ func (r *Reconciler) getSubnetsToReconcile(ctx context.Context, instance *arov1a
return nil, err
}
subnets = append(subnets, clusterSubnetsToReconcile...)
sort.Strings(subnets)

return subnets, nil
}
Expand Down Expand Up @@ -175,10 +197,28 @@ func (r *Reconciler) getStorageAccountNames(ctx context.Context, instance *arov1
return nil, fmt.Errorf("azure storage field is nil in image registry config")
}

return []string{
storageAccounts := []string{
"cluster" + instance.Spec.StorageSuffix, // this is our creation, so name is deterministic
rc.Spec.Storage.Azure.AccountName,
}, nil
}
sort.Strings(storageAccounts)
return storageAccounts, nil
}

func (r *Reconciler) parametersMatchLastReconcile(ctx context.Context, instance *arov1alpha1.Cluster, subnets, storageAccounts []string) bool {
sa := instance.Status.StorageAccounts
return reflect.DeepEqual(sa.Subnets, subnets) && reflect.DeepEqual(sa.StorageAccounts, storageAccounts)
}

func (r *Reconciler) updateCompletionStatus(ctx context.Context, instance *arov1alpha1.Cluster, subnets, storageAccounts []string) error {
updatedInstance := instance.DeepCopy()
updatedInstance.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{
LastCompletionTime: metav1.Now(),
Subnets: subnets,
StorageAccounts: storageAccounts,
}

return r.client.Status().Patch(ctx, updatedInstance, client.MergeFrom(instance))
}

func errIsRateLimited(err error) (time.Duration, bool) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/util/azureclient"
"github.com/Azure/ARO-RP/pkg/util/clusterauthorizer"
"github.com/Azure/ARO-RP/pkg/util/cmp"
_ "github.com/Azure/ARO-RP/pkg/util/scheme"
utilerror "github.com/Azure/ARO-RP/test/util/error"
)
Expand All @@ -45,6 +47,8 @@ var (
subnetNameWorker = "worker"
subnetNameMaster = "master"

numWorkers int32 = 3

storageSuffix = "random-suffix"
clusterStorageAccountName = "cluster" + storageSuffix
registryStorageAccountName = "image-registry-account"
Expand Down Expand Up @@ -81,6 +85,7 @@ func TestReconcile(t *testing.T) {
fakeReconcileAccounts func(ctx context.Context, subnets, storageAccounts []string) error
wantRequeueAfter time.Duration
wantErr string
wantStorageAccountsStatus *arov1alpha1.StorageAccountsStatus
}{
{
name: "no cluster object - returns error",
Expand All @@ -100,10 +105,6 @@ func TestReconcile(t *testing.T) {
},
wantErr: "parsing failed for invalid resource id. Invalid resource Id format",
},
{
name: "correct prerequisites - works as expected",
operatorFlag: true,
},
{
name: "error during subnet checks - returns direct error",
operatorFlag: true,
Expand Down Expand Up @@ -136,8 +137,65 @@ func TestReconcile(t *testing.T) {
},
wantRequeueAfter: 3600 * time.Second,
},
{
name: "correct prerequisites - works as expected",
operatorFlag: true,
wantStorageAccountsStatus: &arov1alpha1.StorageAccountsStatus{
LastCompletionTime: metav1.Now(),
Subnets: []string{
masterSubnetId,
workerSubnetId,
rpPeSubnetId,
rpSubnetId,
gwySubnetId,
},
StorageAccounts: []string{
clusterStorageAccountName,
registryStorageAccountName,
},
},
},
{
name: "correct prerequisites but last reconcile was less than one hour ago - does nothing",
operatorFlag: true,
instance: func(c *arov1alpha1.Cluster) {
c.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{
LastCompletionTime: metav1.NewTime(metav1.Now().Add(-30 * time.Minute)),
}
},
fakeCheckClusterSubnetsToReconcile: func(ctx context.Context, clusterSubnets []string) ([]string, error) {
return nil, fmt.Errorf("should not check subnets")
},
fakeReconcileAccounts: func(ctx context.Context, subnets, storageAccounts []string) error {
return fmt.Errorf("should not reconcile accounts")
},
wantRequeueAfter: time.Duration(30 * time.Minute),
},
{
name: "correct prerequisites but subnets to reconcile match last reconcile - does nothing",
operatorFlag: true,
instance: func(c *arov1alpha1.Cluster) {
c.Status.StorageAccounts = arov1alpha1.StorageAccountsStatus{
Subnets: []string{
masterSubnetId,
workerSubnetId,
rpPeSubnetId,
rpSubnetId,
gwySubnetId,
},
StorageAccounts: []string{
clusterStorageAccountName,
registryStorageAccountName,
},
}
},
fakeReconcileAccounts: func(ctx context.Context, subnets, storageAccounts []string) error {
return fmt.Errorf("should not reconcile accounts")
},
},
} {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
instance := getValidClusterInstance(tt.operatorFlag)
if tt.instance != nil {
tt.instance(instance)
Expand Down Expand Up @@ -172,7 +230,8 @@ func TestReconcile(t *testing.T) {
WithObjects(instance).
WithObjects(rc).
WithObjects(azCredSecret).
WithLists(getValidMachines()).
WithLists(getMasterMachines()).
WithObjects(getWorkerMachineSet()).
Build()

fakeNewManager := func(
Expand Down Expand Up @@ -204,11 +263,23 @@ func TestReconcile(t *testing.T) {
newManager: fakeNewManager,
}

result, err := r.Reconcile(context.Background(), reconcile.Request{})
result, err := r.Reconcile(ctx, reconcile.Request{})

utilerror.AssertErrorMessage(t, err, tt.wantErr)
if result.RequeueAfter != tt.wantRequeueAfter {
t.Errorf("wanted requeue after %v but got %v", tt.wantRequeueAfter, result.RequeueAfter)
if ok, diff := durationIsCloseTo(tt.wantRequeueAfter, result.RequeueAfter, time.Second); !ok {
t.Errorf("wanted requeue after %v but got %v, diff %s", tt.wantRequeueAfter, result.RequeueAfter, diff)
}
if tt.wantStorageAccountsStatus != nil {
gotInstance := &arov1alpha1.Cluster{}
if err := clientFake.Get(ctx, types.NamespacedName{Name: arov1alpha1.SingletonClusterName}, gotInstance); err != nil {
t.Fatal(err)
}

gotStorageAccountsStatus := gotInstance.Status.StorageAccounts

if diff := cmp.Diff(*tt.wantStorageAccountsStatus, gotStorageAccountsStatus, cmpoptsSortStringSlices, cmpopts.EquateApproxTime(time.Second)); diff != "" {
t.Errorf("wanted storageAccountsStatus %v but got %v, diff: %s", tt.wantStorageAccountsStatus, gotStorageAccountsStatus, diff)
}
}
})
}
Expand All @@ -233,7 +304,7 @@ func getValidClusterInstance(operatorFlag bool) *arov1alpha1.Cluster {
}
}

func getValidMachines() *machinev1beta1.MachineList {
func getMasterMachines() *machinev1beta1.MachineList {
list := &machinev1beta1.MachineList{
Items: []machinev1beta1.Machine{},
}
Expand Down Expand Up @@ -284,6 +355,38 @@ func getValidMachines() *machinev1beta1.MachineList {
return list
}

func getWorkerMachineSet() *machinev1beta1.MachineSet {
return &machinev1beta1.MachineSet{
ObjectMeta: metav1.ObjectMeta{
Name: "worker",
Namespace: "openshift-machine-api",
Labels: map[string]string{
"machine.openshift.io/cluster-api-machine-role": "worker",
},
},
Spec: machinev1beta1.MachineSetSpec{
Replicas: &numWorkers,
Template: machinev1beta1.MachineTemplateSpec{
ObjectMeta: machinev1beta1.ObjectMeta{
Labels: map[string]string{
"machine.openshift.io/cluster-api-machine-role": "worker",
},
},
Spec: machinev1beta1.MachineSpec{
ProviderSpec: machinev1beta1.ProviderSpec{
Value: &kruntime.RawExtension{
Raw: []byte(fmt.Sprintf(
`{"resourceGroup":"%s","networkResourceGroup":"%s","vnet":"%s","subnet":"%s"}`,
managedResourceGroupName, vnetResourceGroup, vnetName, subnetNameWorker,
)),
},
},
},
},
},
}
}

type fakeManager struct {
fakeCheckClusterSubnetsToReconcile func(ctx context.Context, clusterSubnets []string) ([]string, error)
fakeReconcileAccounts func(ctx context.Context, subnets, storageAccounts []string) error
Expand All @@ -302,3 +405,11 @@ func (f *fakeManager) reconcileAccounts(ctx context.Context, subnets, storageAcc
}
return f.fakeReconcileAccounts(ctx, subnets, storageAccounts)
}

func durationIsCloseTo(a, b, margin time.Duration) (bool, time.Duration) {
diff := a - b
if diff < 0 {
diff = -diff
}
return diff <= margin, diff
}
16 changes: 16 additions & 0 deletions pkg/operator/deploy/staticresources/aro.openshift.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,22 @@ spec:
items:
type: string
type: array
storageAccounts:
description: StorageAccountsStatus contains the parameters of the
last successful storage account reconciliation
properties:
lastCompletionTime:
format: date-time
type: string
storageAccounts:
items:
type: string
type: array
subnets:
items:
type: string
type: array
type: object
type: object
type: object
served: true
Expand Down

0 comments on commit d7b8008

Please sign in to comment.