Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/controllers/capiinstaller/capi_installer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ func (r *CapiInstallerController) setAvailableCondition(ctx context.Context, log

log.V(2).Info("CAPI Installer Controller is Available")

if err := r.SyncStatus(ctx, co, conds); err != nil {
if err := r.SyncStatus(ctx, controllerName, co, conds); err != nil {
return fmt.Errorf("failed to sync status: %w", err)
}

Expand All @@ -311,7 +311,7 @@ func (r *CapiInstallerController) setDegradedCondition(ctx context.Context, log

log.Info("CAPI Installer Controller is Degraded")

if err := r.SyncStatus(ctx, co, conds); err != nil {
if err := r.SyncStatus(ctx, controllerName, co, conds); err != nil {
return fmt.Errorf("failed to sync status: %w", err)
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/clusteroperator/clusteroperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ func (r *ClusterOperatorController) Reconcile(ctx context.Context, req ctrl.Requ
} else {
// TODO: wrap this into status aggregation logic to get these conditions conform,
// to the meaningful aggregation of all the other controllers ones.
//
// TODO: set a time period where if one of the controllers conditions has been degraded=true (reduced QoS)
// for an extended period of time (eg. 30mins, degrade top level) we set the overall operator degraded=true.
// For any controller available=false condition instead (e.g. when the CAPI components are failing to run),
// we should immediately set the overall operator available=false immediately.
if err := r.ClusterOperatorStatusClient.SetStatusAvailable(ctx, ""); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set conditions for %q ClusterObject: %w", controllers.ClusterOperatorName, err)
}
Expand Down
62 changes: 53 additions & 9 deletions pkg/controllers/corecluster/corecluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,22 @@ const (
capiInfraClusterAPIVersionV1Beta1 = "infrastructure.cluster.x-k8s.io/v1beta1"
capiInfraClusterAPIVersionV1Beta2 = "infrastructure.cluster.x-k8s.io/v1beta2"
clusterOperatorName = "cluster-api"

// CoreClusterControllerAvailableCondition is the condition type that indicates the CoreCluster controller is available.
CoreClusterControllerAvailableCondition = "CoreClusterControllerAvailable"

// CoreClusterControllerDegradedCondition is the condition type that indicates the CoreCluster controller is degraded.
CoreClusterControllerDegradedCondition = "CoreClusterControllerDegraded"
)

var (
errPlatformStatusShouldNotBeNil = errors.New("infrastructure platformStatus should not be nil")
errUnsupportedPlatformType = errors.New("unsupported platform type")
errOpenshiftInfraShouldNotBeNil = errors.New("infrastructure object should not be nil")
errOpenshiftInfrastructureNameShouldNotBeEmpty = errors.New("infrastructure object's infrastructureName should not be empty")

//nolint:gochecknoglobals
expectedConditionTypes = []string{CoreClusterControllerAvailableCondition, CoreClusterControllerDegradedCondition}
)

// CoreClusterController reconciles a Cluster object.
Expand Down Expand Up @@ -81,38 +90,73 @@ func (r *CoreClusterController) SetupWithManager(mgr ctrl.Manager) error {
}

// Reconcile reconciles the core cluster object for the openshift-cluster-api namespace.
//
//nolint:funlen
func (r *CoreClusterController) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx).WithName(controllerName)
logger.Info("Reconciling core cluster")
defer logger.Info("Finished reconciling core cluster")

var err error

co, err := r.GetOrCreateClusterOperator(ctx)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get or create cluster operator: %w", err)
}

// Populate conditions from the existing cluster operator status.
conditions := operatorstatus.FilterOwnedConditions(co.Status.Conditions, expectedConditionTypes)

defer func() {
// This function runs before every return, to ensure the controller conditions
// for that codepath are correctly synced to the cluster operator status.
if syncErr := r.SyncControllerConditions(ctx, co, controllerName, conditions, expectedConditionTypes); syncErr != nil {
if err != nil {
// The sync of the controller conditions failed but there is also a non nil reconcile error.
// Log the sync error and pass down the reconciler one.
logger.Error(syncErr, "failed to sync cluster operator status")
} else {
// If there is no reconcile error, but there is a sync error, then we want to propagate the
// sync error to be returned by the Reconcile function, so that it will cause an immediate requeue/retry.
err = syncErr
}
}
}()

ocpInfrastructureName, err := getOCPInfrastructureName(r.Infra)
if err != nil {
operatorstatus.SetCondition(conditions, CoreClusterControllerDegradedCondition, configv1.ConditionTrue,
operatorstatus.ReasonSyncFailed, fmt.Sprintf("controller is degraded: %s", err.Error()))

return ctrl.Result{}, fmt.Errorf("failed to obtain infrastructure name: %w", err)
}

cluster, err := r.ensureCoreCluster(ctx, client.ObjectKey{Namespace: r.ManagedNamespace, Name: ocpInfrastructureName}, logger)
if err != nil {
operatorstatus.SetCondition(conditions, CoreClusterControllerDegradedCondition, configv1.ConditionTrue,
operatorstatus.ReasonSyncFailed, fmt.Sprintf("controller is degraded: %s", err.Error()))

return ctrl.Result{}, fmt.Errorf("failed to ensure core cluster: %w", err)
}

if !cluster.DeletionTimestamp.IsZero() {
if err := r.SetStatusAvailable(ctx, ""); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set status available: %w", err)
}

return ctrl.Result{}, nil
return ctrl.Result{}, err
}

if err := r.ensureCoreClusterControlPlaneInitializedCondition(ctx, cluster); err != nil {
operatorstatus.SetCondition(conditions, CoreClusterControllerDegradedCondition, configv1.ConditionTrue,
operatorstatus.ReasonSyncFailed, fmt.Sprintf("controller is degraded: %s", err.Error()))

return ctrl.Result{}, fmt.Errorf("failed to ensure core cluster has the ControlPlaneInitializedCondition: %w", err)
}

if err := r.SetStatusAvailable(ctx, ""); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set status available: %w", err)
}
operatorstatus.SetCondition(conditions, CoreClusterControllerDegradedCondition, configv1.ConditionFalse,
operatorstatus.ReasonAsExpected, "controller works as expected")

operatorstatus.SetCondition(conditions, CoreClusterControllerAvailableCondition, configv1.ConditionTrue,
operatorstatus.ReasonAsExpected, "controller is available")

return ctrl.Result{}, nil
return ctrl.Result{}, err
}

// ensureCoreCluster creates a cluster with the given name and returns the cluster object.
Expand Down
68 changes: 44 additions & 24 deletions pkg/controllers/corecluster/corecluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
capabuilder "github.com/openshift/cluster-api-actuator-pkg/testutils/resourcebuilder/cluster-api/infrastructure/v1beta2"
configv1resourcebuilder "github.com/openshift/cluster-api-actuator-pkg/testutils/resourcebuilder/config/v1"
corev1resourcebuilder "github.com/openshift/cluster-api-actuator-pkg/testutils/resourcebuilder/core/v1"
"github.com/openshift/cluster-capi-operator/pkg/controllers"
"github.com/openshift/cluster-capi-operator/pkg/operatorstatus"
)

Expand Down Expand Up @@ -100,7 +101,7 @@ var _ = Describe("Reconcile Core cluster", func() {
testNamespaceName = namespace.Name

By("Creating the testing ClusterOperator object")
cO := configv1resourcebuilder.ClusterOperator().WithName(clusterOperatorName).Build()
cO := configv1resourcebuilder.ClusterOperator().WithName(controllers.ClusterOperatorName).Build()
Expect(cl.Create(ctx, cO)).To(Succeed())
})

Expand Down Expand Up @@ -143,13 +144,23 @@ var _ = Describe("Reconcile Core cluster", func() {
Context("When there is no core cluster", func() {
Context("When there is no infra cluster", func() {
It("should not create core or infra cluster", func() {

testInfraCluster := capabuilder.AWSCluster().WithName(testInfraName).WithNamespace(testNamespaceName).Build()
Consistently(komega.Get(testInfraCluster)).Should(MatchError("awsclusters.infrastructure.cluster.x-k8s.io \"test-ocp-infrastructure-name\" not found"))

testCoreCluster := capibuilder.Cluster().WithName(testInfraName).WithNamespace(testNamespaceName).Build()
Consistently(komega.Get(testCoreCluster)).Should(MatchError("clusters.cluster.x-k8s.io \"test-ocp-infrastructure-name\" not found"))
})

It("should update the ClusterOperator status conditions with controller specific ones to reflect a degraded state", func() {
Eventually(komega.Object(configv1resourcebuilder.ClusterOperator().WithName(controllers.ClusterOperatorName).Build())).Should(
HaveField("Status.Conditions", SatisfyAll(
ContainElement(And(
HaveField("Type", BeEquivalentTo(CoreClusterControllerDegradedCondition)),
HaveField("Status", BeEquivalentTo(configv1.ConditionTrue)),
)),
)),
)
})
})

Context("When there is an infra cluster", func() {
Expand All @@ -173,28 +184,19 @@ var _ = Describe("Reconcile Core cluster", func() {
)
})

Context("With a ClusterOperator", func() {
It("should update the ClusterOperator status to be available, upgradeable, non-progressing, non-degraded", func() {
co := komega.Object(configv1resourcebuilder.ClusterOperator().WithName(clusterOperatorName).Build())
Eventually(co).Should(
HaveField("Status.Conditions", SatisfyAll(
ContainElement(And(HaveField("Type", Equal(configv1.OperatorAvailable)), HaveField("Status", Equal(configv1.ConditionTrue)))),
ContainElement(And(HaveField("Type", Equal(configv1.OperatorProgressing)), HaveField("Status", Equal(configv1.ConditionFalse)))),
ContainElement(And(HaveField("Type", Equal(configv1.OperatorDegraded)), HaveField("Status", Equal(configv1.ConditionFalse)))),
ContainElement(And(HaveField("Type", Equal(configv1.OperatorUpgradeable)), HaveField("Status", Equal(configv1.ConditionTrue)))),
It("should update the ClusterOperator status conditions with controller specific ones to reflect a normal state", func() {
Eventually(komega.Object(configv1resourcebuilder.ClusterOperator().WithName(controllers.ClusterOperatorName).Build())).Should(
HaveField("Status.Conditions", SatisfyAll(
ContainElement(And(
HaveField("Type", BeEquivalentTo(CoreClusterControllerAvailableCondition)),
HaveField("Status", BeEquivalentTo(configv1.ConditionTrue)),
)),
)
})

It("should update the ClusterOperator status version to the desired one", func() {
co := komega.Object(configv1resourcebuilder.ClusterOperator().WithName(clusterOperatorName).Build())
Eventually(co).Should(
HaveField("Status.Versions", ContainElement(SatisfyAll(
HaveField("Name", Equal("operator")),
HaveField("Version", Equal(desiredOperatorReleaseVersion)),
))),
)
})
ContainElement(And(
HaveField("Type", BeEquivalentTo(CoreClusterControllerDegradedCondition)),
HaveField("Status", BeEquivalentTo(configv1.ConditionFalse)),
)),
)),
)
})
})
})
Expand Down Expand Up @@ -233,10 +235,28 @@ var _ = Describe("Reconcile Core cluster", func() {
)),
)
})

It("should update the ClusterOperator status conditions with controller specific ones to reflect a normal state", func() {
Eventually(komega.Object(configv1resourcebuilder.ClusterOperator().WithName(controllers.ClusterOperatorName).Build())).Should(
HaveField("Status.Conditions", SatisfyAll(
ContainElement(And(
HaveField("Type", BeEquivalentTo(CoreClusterControllerAvailableCondition)),
HaveField("Status", BeEquivalentTo(configv1.ConditionTrue)),
)),
ContainElement(And(
HaveField("Type", BeEquivalentTo(CoreClusterControllerDegradedCondition)),
HaveField("Status", BeEquivalentTo(configv1.ConditionFalse)),
)),
)),
)
})
})

})
})

// This case should not happen as this controller should not run on an unsupported platform.
// Still we want to test this as we have logic in place to handle cases where the controller runs out of band.
Context("With an unsupported platform", func() {
BeforeEach(func() {
By("Creating the testing infrastructure for NonePlatform")
Expand All @@ -253,7 +273,7 @@ var _ = Describe("Reconcile Core cluster", func() {
Type: configv1.NonePlatformType,
},
}
Expect(cl.Status().Patch(ctx, noneInfra, client.MergeFrom(infra))).To(Succeed())
Expect(cl.Status().Patch(ctx, infra, client.MergeFrom(noneInfra))).To(Succeed())
})

JustBeforeEach(func() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/infracluster/infracluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func (r *InfraClusterController) setAvailableCondition(ctx context.Context, log

log.V(2).Info("InfraCluster Controller is Available")

if err := r.SyncStatus(ctx, co, conds); err != nil {
if err := r.SyncStatus(ctx, controllerName, co, conds); err != nil {
return fmt.Errorf("failed to sync status: %w", err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/secretsync/secret_sync_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (r *UserDataSecretController) setAvailableCondition(ctx context.Context, lo

log.Info("user Data Secret Controller is available")

if err := r.SyncStatus(ctx, co, conds); err != nil {
if err := r.SyncStatus(ctx, controllerName, co, conds); err != nil {
return fmt.Errorf("failed to sync status: %w", err)
}

Expand All @@ -227,7 +227,7 @@ func (r *UserDataSecretController) setDegradedCondition(ctx context.Context, log

log.Info("user Data Secret Controller is degraded")

if err := r.SyncStatus(ctx, co, conds); err != nil {
if err := r.SyncStatus(ctx, controllerName, co, conds); err != nil {
return fmt.Errorf("failed to sync status: %w", err)
}

Expand Down
Loading