Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate errors of ARO MachineHealthCheckController to ARO Operator #3177

Merged
merged 7 commits into from
Oct 17, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/sirupsen/logrus"
kruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand All @@ -22,6 +21,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/operator/controllers/base"
"github.com/Azure/ARO-RP/pkg/util/dynamichelper"
)

Expand All @@ -38,44 +38,59 @@ const (
)

type Reconciler struct {
log *logrus.Entry
base.AROController

dh dynamichelper.Interface

client client.Client
}

func NewReconciler(log *logrus.Entry, client client.Client, dh dynamichelper.Interface) *Reconciler {
return &Reconciler{
log: log,
dh: dh,
client: client,
AROController: base.AROController{
Log: log,
Client: client,
Name: ControllerName,
},
dh: dh,
}
}

// Reconcile watches MachineHealthCheck objects, and if any changes,
// reconciles the associated ARO MachineHealthCheck object
func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
instance := &arov1alpha1.Cluster{}
err := r.client.Get(ctx, types.NamespacedName{Name: arov1alpha1.SingletonClusterName}, instance)
instance, err := r.GetCluster(ctx)

if err != nil {
return reconcile.Result{}, err
}

if !instance.Spec.OperatorFlags.GetSimpleBoolean(enabled) {
r.log.Debug("controller is disabled")
r.Log.Debug("controller is disabled")
return reconcile.Result{}, nil
}

r.log.Debug("running")
r.Log.Debug("running")
if !instance.Spec.OperatorFlags.GetSimpleBoolean(managed) {
r.SetProgressing(ctx, "Not MHC Managed for cluster maintenance purpose.")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Leaving a comment here based on our discussion yesterday, it seems like Progressing isn't necessary here. We should remove the SetProgresing and ClearProgressing calls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree here. Progressing is not required and will solve a few of the other requested changes on this PR by removing it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed those calls.


err := r.dh.EnsureDeleted(ctx, "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck")
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)
r.ClearProgressing(ctx)
cadenmarchese marked this conversation as resolved.
Show resolved Hide resolved

return reconcile.Result{RequeueAfter: time.Hour}, err
}

err = r.dh.EnsureDeleted(ctx, "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert")
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)
r.ClearProgressing(ctx)
cadenmarchese marked this conversation as resolved.
Show resolved Hide resolved

return reconcile.Result{RequeueAfter: time.Hour}, err
}

safwank97 marked this conversation as resolved.
Show resolved Hide resolved
r.ClearConditions(ctx)
return reconcile.Result{}, nil
}

Expand All @@ -84,6 +99,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
for _, asset := range [][]byte{machinehealthcheckYaml, mhcremediationalertYaml} {
resource, _, err := scheme.Codecs.UniversalDeserializer().Decode(asset, nil, nil)
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)

return reconcile.Result{}, err
}

Expand All @@ -105,9 +123,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
// create/update the MHC CR
err = r.dh.Ensure(ctx, resources...)
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)

return reconcile.Result{}, err
}

r.ClearConditions(ctx)
return reconcile.Result{}, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/golang/mock/gomock"
operatorv1 "github.com/openshift/api/operator/v1"
"github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -19,24 +20,34 @@ import (
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
mock_dynamichelper "github.com/Azure/ARO-RP/pkg/util/mocks/dynamichelper"
_ "github.com/Azure/ARO-RP/pkg/util/scheme"
utilconditions "github.com/Azure/ARO-RP/test/util/conditions"
utilerror "github.com/Azure/ARO-RP/test/util/error"
)

// Test reconcile function
func TestReconciler(t *testing.T) {
func TestMachineHealthCheckReconciler(t *testing.T) {
transitionTime := metav1.Time{Time: time.Now()}
defaultAvailable := utilconditions.ControllerDefaultAvailable(ControllerName)
defaultProgressing := utilconditions.ControllerDefaultProgressing(ControllerName)
defaultDegraded := utilconditions.ControllerDefaultDegraded(ControllerName)

defaultConditions := []operatorv1.OperatorCondition{defaultAvailable, defaultProgressing, defaultDegraded}

type test struct {
name string
instance *arov1alpha1.Cluster
mocks func(mdh *mock_dynamichelper.MockInterface)
wantConditions []operatorv1.OperatorCondition
wantErr string
wantRequeueAfter time.Duration
}

for _, tt := range []*test{
{
name: "Failure to get instance",
mocks: func(mdh *mock_dynamichelper.MockInterface) {},
wantErr: `clusters.aro.openshift.io "cluster" not found`,
name: "Failure to get instance",
mocks: func(mdh *mock_dynamichelper.MockInterface) {},
wantConditions: defaultConditions,
wantErr: `clusters.aro.openshift.io "cluster" not found`,
},
{
name: "Enabled Feature Flag is false",
Expand All @@ -49,12 +60,16 @@ func TestReconciler(t *testing.T) {
enabled: strconv.FormatBool(false),
},
},
Status: arov1alpha1.ClusterStatus{
Conditions: defaultConditions,
},
},
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(0)
mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(nil).Times(0)
},
wantErr: "",
wantConditions: defaultConditions,
wantErr: "",
},
{
name: "Managed Feature Flag is false: ensure mhc and its alert are deleted",
Expand All @@ -68,12 +83,16 @@ func TestReconciler(t *testing.T) {
managed: strconv.FormatBool(false),
},
},
Status: arov1alpha1.ClusterStatus{
Conditions: defaultConditions,
},
},
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(1)
mdh.EXPECT().EnsureDeleted(gomock.Any(), "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert").Times(1)
},
wantErr: "",
wantConditions: defaultConditions,
wantErr: "",
},
{
name: "Managed Feature Flag is false: mhc fails to delete, an error is returned",
Expand All @@ -87,11 +106,24 @@ func TestReconciler(t *testing.T) {
managed: strconv.FormatBool(false),
},
},
Status: arov1alpha1.ClusterStatus{
Conditions: defaultConditions,
},
},
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Return(errors.New("Could not delete mhc"))
},
wantErr: "Could not delete mhc",
wantErr: "Could not delete mhc",
wantConditions: []operatorv1.OperatorCondition{
defaultAvailable,
defaultProgressing,
{
Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded,
Status: operatorv1.ConditionTrue,
LastTransitionTime: transitionTime,
Message: "Could not delete mhc",
},
},
wantRequeueAfter: time.Hour,
},
{
Expand All @@ -106,12 +138,25 @@ func TestReconciler(t *testing.T) {
managed: strconv.FormatBool(false),
},
},
Status: arov1alpha1.ClusterStatus{
Conditions: defaultConditions,
},
},
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().EnsureDeleted(gomock.Any(), "MachineHealthCheck", "openshift-machine-api", "aro-machinehealthcheck").Times(1)
mdh.EXPECT().EnsureDeleted(gomock.Any(), "PrometheusRule", "openshift-machine-api", "mhc-remediation-alert").Return(errors.New("Could not delete mhc alert"))
},
wantErr: "Could not delete mhc alert",
wantErr: "Could not delete mhc alert",
wantConditions: []operatorv1.OperatorCondition{
defaultAvailable,
defaultProgressing,
{
Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded,
Status: operatorv1.ConditionTrue,
LastTransitionTime: transitionTime,
Message: "Could not delete mhc alert",
},
},
wantRequeueAfter: time.Hour,
},
{
Expand All @@ -130,7 +175,8 @@ func TestReconciler(t *testing.T) {
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(nil).Times(1)
},
wantErr: "",
wantConditions: defaultConditions,
wantErr: "",
},
{
name: "When ensuring resources fails, an error is returned",
Expand All @@ -144,11 +190,24 @@ func TestReconciler(t *testing.T) {
managed: strconv.FormatBool(true),
},
},
Status: arov1alpha1.ClusterStatus{
Conditions: defaultConditions,
},
},
mocks: func(mdh *mock_dynamichelper.MockInterface) {
mdh.EXPECT().Ensure(gomock.Any(), gomock.Any()).Return(errors.New("failed to ensure"))
},
wantErr: "failed to ensure",
wantConditions: []operatorv1.OperatorCondition{
defaultAvailable,
defaultProgressing,
{
Type: ControllerName + "Controller" + operatorv1.OperatorStatusTypeDegraded,
Status: operatorv1.ConditionTrue,
LastTransitionTime: transitionTime,
Message: "failed to ensure",
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -165,18 +224,24 @@ func TestReconciler(t *testing.T) {
}

ctx := context.Background()
r := &Reconciler{
log: logrus.NewEntry(logrus.StandardLogger()),
dh: mdh,
client: clientBuilder.Build(),
}

r := NewReconciler(
logrus.NewEntry(logrus.StandardLogger()),
clientBuilder.Build(),
mdh,
)

request := ctrl.Request{}
request.Name = "cluster"

result, err := r.Reconcile(ctx, request)

if tt.wantRequeueAfter != result.RequeueAfter {
t.Errorf("Wanted to requeue after %v but was set to %v", tt.wantRequeueAfter, result.RequeueAfter)
t.Errorf("wanted to requeue after %v but was set to %v", tt.wantRequeueAfter, result.RequeueAfter)
}

if tt.instance != nil {
utilconditions.AssertControllerConditions(t, ctx, r.AROController.Client, tt.wantConditions)
}

utilerror.AssertErrorMessage(t, err, tt.wantErr)
Expand Down