Skip to content

Commit

Permalink
Only perform machineconfig reconciliation during OpenShift upgrades (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkowl authored Sep 10, 2024
1 parent 98fe23c commit bd9af03
Show file tree
Hide file tree
Showing 16 changed files with 1,332 additions and 105 deletions.
9 changes: 6 additions & 3 deletions cmd/aro/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/Azure/ARO-RP/pkg/operator/controllers/storageaccounts"
"github.com/Azure/ARO-RP/pkg/operator/controllers/subnets"
"github.com/Azure/ARO-RP/pkg/operator/controllers/workaround"
"github.com/Azure/ARO-RP/pkg/util/clienthelper"
"github.com/Azure/ARO-RP/pkg/util/dynamichelper"
utillog "github.com/Azure/ARO-RP/pkg/util/log"
// +kubebuilder:scaffold:imports
Expand Down Expand Up @@ -94,6 +95,8 @@ func operator(ctx context.Context, log *logrus.Entry) error {
return err
}

ch := clienthelper.NewWithClient(log, client)

if role == pkgoperator.RoleMaster {
if err = (genevalogging.NewReconciler(
log.WithField("controller", genevalogging.ControllerName),
Expand Down Expand Up @@ -137,17 +140,17 @@ func operator(ctx context.Context, log *logrus.Entry) error {
}
if err = (dnsmasq.NewClusterReconciler(
log.WithField("controller", dnsmasq.ClusterControllerName),
client, dh)).SetupWithManager(mgr); err != nil {
client, ch)).SetupWithManager(mgr); err != nil {
return fmt.Errorf("unable to create controller %s: %v", dnsmasq.ClusterControllerName, err)
}
if err = (dnsmasq.NewMachineConfigReconciler(
log.WithField("controller", dnsmasq.MachineConfigControllerName),
client, dh)).SetupWithManager(mgr); err != nil {
client, ch)).SetupWithManager(mgr); err != nil {
return fmt.Errorf("unable to create controller %s: %v", dnsmasq.MachineConfigControllerName, err)
}
if err = (dnsmasq.NewMachineConfigPoolReconciler(
log.WithField("controller", dnsmasq.MachineConfigPoolControllerName),
client, dh)).SetupWithManager(mgr); err != nil {
client, ch)).SetupWithManager(mgr); err != nil {
return fmt.Errorf("unable to create controller %s: %v", dnsmasq.MachineConfigPoolControllerName, err)
}
if err = (node.NewReconciler(
Expand Down
16 changes: 16 additions & 0 deletions pkg/cluster/arooperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ func (m *manager) syncClusterObject(ctx context.Context) error {
return err
}

func (m *manager) enableOperatorReconciliation(ctx context.Context) error {
err := m.aroOperatorDeployer.SetForceReconcile(ctx, true)
if err != nil {
m.log.Error(fmt.Errorf("cannot ensureAROOperator.SetForceReconcile: %w", err))
}
return err
}

func (m *manager) disableOperatorReconciliation(ctx context.Context) error {
err := m.aroOperatorDeployer.SetForceReconcile(ctx, false)
if err != nil {
m.log.Error(fmt.Errorf("cannot ensureAROOperator.SetForceReconcile: %w", err))
}
return err
}

func (m *manager) aroDeploymentReady(ctx context.Context) (bool, error) {
if !m.isIngressProfileAvailable() {
// If the ingress profile is not available, ARO operator update/deploy will fail.
Expand Down
2 changes: 2 additions & 0 deletions pkg/cluster/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ func (m *manager) bootstrap() []steps.Step {
steps.Action(m.initializeOperatorDeployer), // depends on kube clients
steps.Condition(m.apiServersReady, 30*time.Minute, true),
steps.Action(m.installAROOperator),
steps.Action(m.enableOperatorReconciliation),
steps.Action(m.incrInstallPhase),
)

Expand Down Expand Up @@ -393,6 +394,7 @@ func (m *manager) Install(ctx context.Context) error {
steps.Action(m.configureIngressCertificate),
steps.Condition(m.ingressControllerReady, 30*time.Minute, true),
steps.Action(m.configureDefaultStorageClass),
steps.Action(m.disableOperatorReconciliation),
steps.Action(m.finishInstallation),
},
}
Expand Down
32 changes: 32 additions & 0 deletions pkg/operator/controllers/base/aro_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,19 @@ package base

import (
"context"
"fmt"

configv1 "github.com/openshift/api/config/v1"
operatorv1 "github.com/openshift/api/operator/v1"
"github.com/openshift/library-go/pkg/operator/v1helpers"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/Azure/ARO-RP/pkg/operator"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/util/version"
)

type AROController struct {
Expand Down Expand Up @@ -103,3 +107,31 @@ func (c *AROController) defaultDegraded() *operatorv1.OperatorCondition {
func (c *AROController) conditionName(conditionType string) string {
return c.Name + "Controller" + conditionType
}

func (c *AROController) IsClusterUpgrading(ctx context.Context) (bool, error) {
cv := &configv1.ClusterVersion{}
err := c.Client.Get(ctx, types.NamespacedName{Name: "version"}, cv)
if err != nil {
err = fmt.Errorf("error getting the ClusterVersion: %w", err)
c.Log.Error(err)
return false, err
}

return version.IsClusterUpgrading(cv), nil
}

func (c *AROController) AllowRebootCausingReconciliation(ctx context.Context, instance *arov1alpha1.Cluster) (bool, error) {
// If reconciliation is forced, perform it
if instance.Spec.OperatorFlags.GetSimpleBoolean(operator.ForceReconciliation) {
c.Log.Debugf("allowing reconciliation of %s because reconciliation forced", c.Name)
return true, nil
}

// Allow the reconciliation if the cluster is upgrading
isClusterUpgrading, err := c.IsClusterUpgrading(ctx)
if err != nil {
return false, err
}

return isClusterUpgrading, nil
}
52 changes: 44 additions & 8 deletions pkg/operator/controllers/dnsmasq/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,26 @@ package dnsmasq

import (
"context"
"fmt"

configv1 "github.com/openshift/api/config/v1"
mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
"github.com/sirupsen/logrus"
kerrors "k8s.io/apimachinery/pkg/api/errors"
kruntime "k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

"github.com/Azure/ARO-RP/pkg/operator"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
"github.com/Azure/ARO-RP/pkg/operator/controllers/base"
"github.com/Azure/ARO-RP/pkg/operator/predicates"
"github.com/Azure/ARO-RP/pkg/util/clienthelper"
"github.com/Azure/ARO-RP/pkg/util/dynamichelper"
)

Expand All @@ -28,22 +34,22 @@ const (

type ClusterReconciler struct {
base.AROController
dh dynamichelper.Interface
ch clienthelper.Interface
}

func NewClusterReconciler(log *logrus.Entry, client client.Client, dh dynamichelper.Interface) *ClusterReconciler {
func NewClusterReconciler(log *logrus.Entry, client client.Client, ch clienthelper.Interface) *ClusterReconciler {
return &ClusterReconciler{
AROController: base.AROController{
Log: log,
Client: client,
Name: ClusterControllerName,
},
dh: dh,
ch: ch,
}
}

// Reconcile watches the ARO object, and if it changes, reconciles all the
// 99-%s-aro-dns machineconfigs
// Reconcile watches the ARO object and ClusterVersion, and if they change,
// reconciles all the 99-%s-aro-dns machineconfigs
func (r *ClusterReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
instance, err := r.GetCluster(ctx)
if err != nil {
Expand All @@ -60,6 +66,13 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, request ctrl.Request)
r.Log.Debug("restartDnsmasq is enabled")
}

allowReconcile, err := r.AllowRebootCausingReconciliation(ctx, instance)
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)
return reconcile.Result{}, err
}

r.Log.Debug("running")
mcps := &mcv1.MachineConfigPoolList{}
err = r.Client.List(ctx, mcps)
Expand All @@ -69,7 +82,7 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, request ctrl.Request)
return reconcile.Result{}, err
}

err = reconcileMachineConfigs(ctx, instance, r.dh, restartDnsmasq, mcps.Items...)
err = reconcileMachineConfigs(ctx, instance, r.ch, r.Client, allowReconcile, restartDnsmasq, mcps.Items...)
if err != nil {
r.Log.Error(err)
r.SetDegraded(ctx, err)
Expand All @@ -82,13 +95,22 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, request ctrl.Request)

// SetupWithManager setup our mananger
func (r *ClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
clusterVersionPredicate := predicate.NewPredicateFuncs(func(o client.Object) bool {
return o.GetName() == "version"
})

return ctrl.NewControllerManagedBy(mgr).
For(&arov1alpha1.Cluster{}, builder.WithPredicates(predicate.And(predicates.AROCluster, predicate.GenerationChangedPredicate{}))).
Named(ClusterControllerName).
Watches(
&source.Kind{Type: &configv1.ClusterVersion{}},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(clusterVersionPredicate),
).
Complete(r)
}

func reconcileMachineConfigs(ctx context.Context, instance *arov1alpha1.Cluster, dh dynamichelper.Interface, restartDnsmasq bool, mcps ...mcv1.MachineConfigPool) error {
func reconcileMachineConfigs(ctx context.Context, instance *arov1alpha1.Cluster, ch clienthelper.Interface, c client.Client, allowReconcile bool, restartDnsmasq bool, mcps ...mcv1.MachineConfigPool) error {
var resources []kruntime.Object
for _, mcp := range mcps {
resource, err := dnsmasqMachineConfig(instance.Spec.Domain, instance.Spec.APIIntIP, instance.Spec.IngressIP, mcp.Name, instance.Spec.GatewayDomains, instance.Spec.GatewayPrivateEndpointIP, restartDnsmasq)
Expand All @@ -109,5 +131,19 @@ func reconcileMachineConfigs(ctx context.Context, instance *arov1alpha1.Cluster,
return err
}

return dh.Ensure(ctx, resources...)
// If we are allowed to reconcile the resources, then we run Ensure to
// create or update. If we are not allowed to reconcile, we do not want to
// perform any updates, but we do want to perform initial configuration.
if allowReconcile {
return ch.Ensure(ctx, resources...)
} else {
for _, i := range resources {
err := c.Create(ctx, i.(client.Object))
// Since we are only creating, ignore AlreadyExists
if err != nil && !kerrors.IsAlreadyExists(err) {
return fmt.Errorf("error creating client object: %w", err)
}
}
}
return nil
}
Loading

0 comments on commit bd9af03

Please sign in to comment.