From 66c113a541699e8926d248b43b0265eb4395362e Mon Sep 17 00:00:00 2001 From: Mohammed Safwan Aslam Kazi <76790986+safwank97@users.noreply.github.com> Date: Tue, 17 Oct 2023 18:26:31 -0400 Subject: [PATCH] Propagate errors of ARO MachineHealthCheckController to ARO Operator (#3177) * Propagate errors of ARO MachineHealthCheckController to ARO Operator --- .../machinehealthcheck_controller.go | 40 +++++--- .../machinehealthcheck_controller_test.go | 95 ++++++++++++++++--- 2 files changed, 109 insertions(+), 26 deletions(-) diff --git a/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller.go b/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller.go index 809aeb0b9a7..7db456217f0 100644 --- a/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -13,7 +13,6 @@ import ( monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/sirupsen/logrus" kruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -22,6 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1" + "github.com/Azure/ARO-RP/pkg/operator/controllers/base" "github.com/Azure/ARO-RP/pkg/util/dynamichelper" ) @@ -38,44 +38,55 @@ const ( ) type Reconciler struct { - log *logrus.Entry + base.AROController dh dynamichelper.Interface - - client client.Client } func NewReconciler(log *logrus.Entry, client client.Client, dh dynamichelper.Interface) *Reconciler { return &Reconciler{ - log: log, - dh: dh, - client: client, + AROController: base.AROController{ + Log: log, + Client: client, + Name: ControllerName, + }, + dh: dh, } } +// Reconcile watches MachineHealthCheck objects, and if any changes, +// reconciles the associated ARO MachineHealthCheck object func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) { - instance := &arov1alpha1.Cluster{} - err := r.client.Get(ctx, types.NamespacedName{Name: arov1alpha1.SingletonClusterName}, instance) + instance, err := r.GetCluster(ctx) + if err != nil { return reconcile.Result{}, err } if !instance.Spec.OperatorFlags.GetSimpleBoolean(enabled) { - r.log.Debug("controller is disabled") + r.Log.Debug("controller is disabled") return reconcile.Result{}, nil } - r.log.Debug("running") + r.Log.Debug("running") if !instance.Spec.OperatorFlags.GetSimpleBoolean(managed) { err := r.dh.EnsureDeleted(ctx, "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck") if err != nil { + r.Log.Error(err) + r.SetDegraded(ctx, err) + return reconcile.Result{RequeueAfter: time.Hour}, err } err = r.dh.EnsureDeleted(ctx, "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert") if err != nil { + r.Log.Error(err) + r.SetDegraded(ctx, err) + return reconcile.Result{RequeueAfter: time.Hour}, err } + + r.ClearConditions(ctx) return reconcile.Result{}, nil } @@ -84,6 +95,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl. for _, asset := range [][]byte{machinehealthcheckYaml, mhcremediationalertYaml} { resource, _, err := scheme.Codecs.UniversalDeserializer().Decode(asset, nil, nil) if err != nil { + r.Log.Error(err) + r.SetDegraded(ctx, err) + return reconcile.Result{}, err } @@ -105,9 +119,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl. // create/update the MHC CR err = r.dh.Ensure(ctx, resources...) if err != nil { + r.Log.Error(err) + r.SetDegraded(ctx, err) + return reconcile.Result{}, err } + r.ClearConditions(ctx) return reconcile.Result{}, nil } diff --git a/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller_test.go b/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller_test.go index d25953cd03a..a9ef0dd5a46 100644 --- a/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller_test.go +++ b/pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/golang/mock/gomock" + operatorv1 "github.com/openshift/api/operator/v1" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" @@ -19,24 +20,34 @@ import ( arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1" mock_dynamichelper "github.com/Azure/ARO-RP/pkg/util/mocks/dynamichelper" _ "github.com/Azure/ARO-RP/pkg/util/scheme" + utilconditions "github.com/Azure/ARO-RP/test/util/conditions" utilerror "github.com/Azure/ARO-RP/test/util/error" ) // Test reconcile function -func TestReconciler(t *testing.T) { +func TestMachineHealthCheckReconciler(t *testing.T) { + transitionTime := metav1.Time{Time: time.Now()} + defaultAvailable := utilconditions.ControllerDefaultAvailable(ControllerName) + defaultProgressing := utilconditions.ControllerDefaultProgressing(ControllerName) + defaultDegraded := utilconditions.ControllerDefaultDegraded(ControllerName) + + defaultConditions := []operatorv1.OperatorCondition{defaultAvailable, defaultProgressing, defaultDegraded} + type test struct { name string instance *arov1alpha1.Cluster mocks func(mdh *mock_dynamichelper.MockInterface) + wantConditions []operatorv1.OperatorCondition wantErr string wantRequeueAfter time.Duration } for _, tt := range []*test{ { - name: "Failure to get instance", - mocks: func(mdh *mock_dynamichelper.MockInterface) {}, - wantErr: `clusters.aro.openshift.io "cluster" not found`, + name: "Failure to get instance", + mocks: func(mdh *mock_dynamichelper.MockInterface) {}, + wantConditions: defaultConditions, + wantErr: `clusters.aro.openshift.io "cluster" not found`, }, { name: "Enabled Feature Flag is false", @@ -49,12 +60,16 @@ func TestReconciler(t *testing.T) { enabled: strconv.FormatBool(false), }, }, + Status: arov1alpha1.ClusterStatus{ + Conditions: defaultConditions, + }, }, mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(0) mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(nil).Times(0) }, - wantErr: "", + wantConditions: defaultConditions, + wantErr: "", }, { name: "Managed Feature Flag is false: ensure mhc and its alert are deleted", @@ -68,12 +83,16 @@ func TestReconciler(t *testing.T) { managed: strconv.FormatBool(false), }, }, + Status: arov1alpha1.ClusterStatus{ + Conditions: defaultConditions, + }, }, mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(1) mdh.EXPECT().EnsureDeleted(gomock.Any(), "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert").Times(1) }, - wantErr: "", + wantConditions: defaultConditions, + wantErr: "", }, { name: "Managed Feature Flag is false: mhc fails to delete, an error is returned", @@ -87,11 +106,24 @@ func TestReconciler(t *testing.T) { managed: strconv.FormatBool(false), }, }, + Status: arov1alpha1.ClusterStatus{ + Conditions: defaultConditions, + }, }, mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Return(errors.New("Could not delete mhc")) }, - wantErr: "Could not delete mhc", + wantErr: "Could not delete mhc", + wantConditions: []operatorv1.OperatorCondition{ + defaultAvailable, + defaultProgressing, + { + Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded, + Status: operatorv1.ConditionTrue, + LastTransitionTime: transitionTime, + Message: "Could not delete mhc", + }, + }, wantRequeueAfter: time.Hour, }, { @@ -106,12 +138,25 @@ func TestReconciler(t *testing.T) { managed: strconv.FormatBool(false), }, }, + Status: arov1alpha1.ClusterStatus{ + Conditions: defaultConditions, + }, }, mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(1) mdh.EXPECT().EnsureDeleted(gomock.Any(), "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert").Return(errors.New("Could not delete mhc alert")) }, - wantErr: "Could not delete mhc alert", + wantErr: "Could not delete mhc alert", + wantConditions: []operatorv1.OperatorCondition{ + defaultAvailable, + defaultProgressing, + { + Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded, + Status: operatorv1.ConditionTrue, + LastTransitionTime: transitionTime, + Message: "Could not delete mhc alert", + }, + }, wantRequeueAfter: time.Hour, }, { @@ -130,7 +175,8 @@ func TestReconciler(t *testing.T) { mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(nil).Times(1) }, - wantErr: "", + wantConditions: defaultConditions, + wantErr: "", }, { name: "When ensuring resources fails, an error is returned", @@ -144,11 +190,24 @@ func TestReconciler(t *testing.T) { managed: strconv.FormatBool(true), }, }, + Status: arov1alpha1.ClusterStatus{ + Conditions: defaultConditions, + }, }, mocks: func(mdh *mock_dynamichelper.MockInterface) { mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(errors.New("failed to ensure")) }, wantErr: "failed to ensure", + wantConditions: []operatorv1.OperatorCondition{ + defaultAvailable, + defaultProgressing, + { + Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded, + Status: operatorv1.ConditionTrue, + LastTransitionTime: transitionTime, + Message: "failed to ensure", + }, + }, }, } { t.Run(tt.name, func(t *testing.T) { @@ -165,18 +224,24 @@ func TestReconciler(t *testing.T) { } ctx := context.Background() - r := &Reconciler{ - log: logrus.NewEntry(logrus.StandardLogger()), - dh: mdh, - client: clientBuilder.Build(), - } + + r := NewReconciler( + logrus.NewEntry(logrus.StandardLogger()), + clientBuilder.Build(), + mdh, + ) + request := ctrl.Request{} request.Name = "cluster" result, err := r.Reconcile(ctx, request) if tt.wantRequeueAfter != result.RequeueAfter { - t.Errorf("Wanted to requeue after %v but was set to %v", tt.wantRequeueAfter, result.RequeueAfter) + t.Errorf("wanted to requeue after %v but was set to %v", tt.wantRequeueAfter, result.RequeueAfter) + } + + if tt.instance != nil { + utilconditions.AssertControllerConditions(t, ctx, r.AROController.Client, tt.wantConditions) } utilerror.AssertErrorMessage(t, err, tt.wantErr)