Skip to content

Commit

Permalink
feat(status): update status.condition in DSC and DSCI
Browse files Browse the repository at this point in the history
for Condition:
- create a generic function UpdateFailedCondition() for all components to use, return condition and error
- keep status types to only "avaiable" "progressing" degraded" and "reconcilesuccess"
- status:ReconcileComplete is renamed to ReconcileSuccess
- status:upgradeable is removed

for pipeline:
- move SetExistingArgoCondition from pipeline to status
- remove duplicated update on DSC, since error already catch in pipeline and UpdateFailedCondition() is called

for Phase: Created, Ready, Error, deleting
- set phase to Error if Argo CRD exist
- remove Progressing to use Created instead when DSCI CR just created

chore:
- fix typo and some missing customized error message
- fix Message when created
- move static reason,message into status package

Signed-off-by: Wen Zhou <[email protected]>
  • Loading branch information
zdtsw committed Aug 21, 2024
1 parent 14fdb89 commit 32fdb2d
Show file tree
Hide file tree
Showing 26 changed files with 528 additions and 429 deletions.
5 changes: 5 additions & 0 deletions apis/datasciencecluster/v1/datasciencecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,15 @@ type Components struct {
type DataScienceClusterStatus struct {
// Phase describes the Phase of DataScienceCluster reconciliation state
// This is used by OLM UI to provide status information to the user
// Newer API types should use conditions instead. Phase was essentially a state-machine enumeration field, that contradicted system-design principles and hampered evolution, since adding new enum values breaks backward compatibility.
// Rather than encouraging clients to infer implicit properties from phases, we prefer to explicitly expose the individual conditions that clients need to monitor.
// Known .status.phase are: "Created", "Error", "Ready" "Deleting"
Phase string `json:"phase,omitempty"`

// Conditions describes the state of the DataScienceCluster resource.
// +optional
// standard known .status.conditions.type are: "Available", "Progressing", "Degraded"
// Extra .status.conditions.type are : "ReconcileSuccess" "CapabilityDSPv2Argo" and <component>Ready
Conditions []conditionsv1.Condition `json:"conditions,omitempty"`

// RelatedObjects is a list of objects created and maintained by this operator.
Expand Down
6 changes: 6 additions & 0 deletions apis/dscinitialization/v1/dscinitialization_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,17 @@ type TrustedCABundleSpec struct {
type DSCInitializationStatus struct {
// Phase describes the Phase of DSCInitializationStatus
// This is used by OLM UI to provide status information to the user
// The pattern of using phase is deprecated.
// Newer API types should use conditions instead. Phase was essentially a state-machine enumeration field, that contradicted system-design principles and hampered evolution, since adding new enum values breaks backward compatibility.
// Rather than encouraging clients to infer implicit properties from phases, we prefer to explicitly expose the individual conditions that clients need to monitor.
// Known .status.phase are: "Created", "Error", "Ready" "Deleting"
Phase string `json:"phase,omitempty"`

// Conditions describes the state of the DSCInitializationStatus resource
// +operator-sdk:csv:customresourcedefinitions:type=status
// +optional
// standard known .status.conditions.type are: "Available", "Progressing", "Degraded"
// Extra .status.conditions.type are : "ReconcileSuccess", "CapabilityServiceMesh", "CapabilityServiceMeshAuthorization"
Conditions []conditionsv1.Condition `json:"conditions,omitempty"`

// RelatedObjects is a list of objects created and maintained by this operator.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ spec:
description: DSCInitializationStatus defines the observed state of DSCInitialization.
properties:
conditions:
description: Conditions describes the state of the DSCInitializationStatus
resource
description: 'Conditions describes the state of the DSCInitializationStatus
resource Known .status.conditions.type are: "Available", "Progressing",
"Degraded" and "ReconcileSuccess"'
items:
description: Condition represents the state of the operator's reconciliation
functionality.
Expand Down Expand Up @@ -219,6 +220,13 @@ spec:
phase:
description: Phase describes the Phase of DSCInitializationStatus
This is used by OLM UI to provide status information to the user
The pattern of using phase is deprecated. Newer API types should
use conditions instead. Phase was essentially a state-machine enumeration
field, that contradicted system-design principles and hampered evolution,
since adding new enum values breaks backward compatibility. Rather
than encouraging clients to infer implicit properties from phases,
we prefer to explicitly expose the individual conditions that clients
need to monitor.
type: string
relatedObjects:
description: RelatedObjects is a list of objects created and maintained
Expand Down
25 changes: 13 additions & 12 deletions components/codeflare/codeflare.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (

"github.com/go-logr/logr"
operatorv1 "github.com/openshift/api/operator/v1"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1"
"github.com/opendatahub-io/opendatahub-operator/v2/components"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
)
Expand Down Expand Up @@ -63,37 +65,36 @@ func (c *CodeFlare) ReconcileComponent(ctx context.Context,
owner metav1.Object,
dscispec *dsciv1.DSCInitializationSpec,
platform cluster.Platform,
_ bool) error {
_ bool) (conditionsv1.Condition, error) {
l := c.ConfigComponentLogger(logger, ComponentName, dscispec)
var imageParamMap = map[string]string{
"codeflare-operator-controller-image": "RELATED_IMAGE_ODH_CODEFLARE_OPERATOR_IMAGE", // no need mcad, embedded in cfo
}

enabled := c.GetManagementState() == operatorv1.Managed
monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed

if enabled {
if c.DevFlags != nil {
// Download manifests and update paths
if err := c.OverrideManifests(ctx, platform); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
}
// check if the CodeFlare operator is installed: it should not be installed
// Both ODH and RHOAI should have the same operator name
dependentOperator := CodeflareOperator

if found, err := cluster.OperatorExists(ctx, cli, dependentOperator); err != nil {
return fmt.Errorf("operator exists throws error %w", err)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("operator exists throws error %w", err))
} else if found {
return fmt.Errorf("operator %s is found. Please uninstall the operator before enabling %s component",
dependentOperator, ComponentName)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("operator %s is found. Please uninstall the operator before enabling %s component",
dependentOperator, ComponentName))
}

// Update image parameters only when we do not have customized manifests set
if (dscispec.DevFlags == nil || dscispec.DevFlags.ManifestsUri == "") && (c.DevFlags == nil || len(c.DevFlags.Manifests) == 0) {
if err := deploy.ApplyParams(ParamsPath, imageParamMap, map[string]string{"namespace": dscispec.ApplicationsNamespace}); err != nil {
return fmt.Errorf("failed update image from %s : %w", CodeflarePath+"/bases", err)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("failed update image from %s : %w", CodeflarePath+"/bases", err))
}
}
}
Expand All @@ -103,31 +104,31 @@ func (c *CodeFlare) ReconcileComponent(ctx context.Context,
CodeflarePath,
dscispec.ApplicationsNamespace,
ComponentName, enabled); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("failed to apply manifests %s: %w", CodeflarePath, err))
}
l.Info("apply manifests done")
// CloudServiceMonitoring handling
if platform == cluster.ManagedRhods {
if enabled {
// first check if the service is up, so prometheus won't fire alerts when it is just startup
if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 20, 2); err != nil {
return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err))
}
l.Info("deployment is done, updating monitoring rules")
}

// inject prometheus codeflare*.rules in to /opt/manifests/monitoring/prometheus/prometheus-configs.yaml
if err := c.UpdatePrometheusConfig(cli, enabled && monitoringEnabled, ComponentName); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
if err := deploy.DeployManifestsFromPath(ctx, cli, owner,
filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"),
dscispec.Monitoring.Namespace,
"prometheus", true); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
l.Info("updating SRE monitoring done")
}

return nil
return status.GetDefaultComponentCondition(ComponentName), nil
}
5 changes: 3 additions & 2 deletions components/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/go-logr/logr"
operatorv1 "github.com/openshift/api/operator/v1"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
"gopkg.in/yaml.v2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -79,8 +80,8 @@ type ManifestsConfig struct {
}

type ComponentInterface interface {
ReconcileComponent(ctx context.Context, cli client.Client, logger logr.Logger,
owner metav1.Object, DSCISpec *dsciv1.DSCInitializationSpec, platform cluster.Platform, currentComponentStatus bool) error
ReconcileComponent(ctx context.Context, cli client.Client, logger logr.Logger, owner metav1.Object,
DSCISpec *dsciv1.DSCInitializationSpec, platform cluster.Platform, currentComponentStatus bool) (conditionsv1.Condition, error)
Cleanup(ctx context.Context, cli client.Client, DSCISpec *dsciv1.DSCInitializationSpec) error
GetComponentName() string
GetManagementState() operatorv1.ManagementState
Expand Down
33 changes: 17 additions & 16 deletions components/dashboard/dashboard.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import (

"github.com/go-logr/logr"
operatorv1 "github.com/openshift/api/operator/v1"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
corev1 "k8s.io/api/core/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1"
"github.com/opendatahub-io/opendatahub-operator/v2/components"
"github.com/opendatahub-io/opendatahub-operator/v2/controllers/status"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy"
)
Expand Down Expand Up @@ -67,9 +69,8 @@ func (d *Dashboard) ReconcileComponent(ctx context.Context,
dscispec *dsciv1.DSCInitializationSpec,
platform cluster.Platform,
currentComponentExist bool,
) error {
) (conditionsv1.Condition, error) {
var l logr.Logger

if platform == cluster.SelfManagedRhods || platform == cluster.ManagedRhods {
l = d.ConfigComponentLogger(logger, ComponentNameDownstream, dscispec)
} else {
Expand All @@ -90,12 +91,12 @@ func (d *Dashboard) ReconcileComponent(ctx context.Context,
if enabled {
// 1. cleanup OAuth client related secret and CR if dashboard is in 'installed false' status
if err := d.cleanOauthClient(ctx, cli, dscispec, currentComponentExist, l); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameUpstream, err)
}
if d.DevFlags != nil {
// Download manifests and update paths
if err := d.OverrideManifests(ctx, platform); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameUpstream, err)
}
if OverridePath != "" {
entryPath = OverridePath
Expand All @@ -107,23 +108,23 @@ func (d *Dashboard) ReconcileComponent(ctx context.Context,
// 2. platform specific RBAC
if platform == cluster.OpenDataHub || platform == "" {
if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, "odh-dashboard"); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameUpstream, err)
}
} else {
if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, "rhods-dashboard"); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameDownstream, err)
}
}

// 3. Append or Update variable for component to consume
extraParamsMap, err := updateKustomizeVariable(ctx, cli, platform, dscispec)
if err != nil {
return errors.New("failed to set variable for extraParamsMap")
return status.UpdateFailedCondition(ComponentNameUpstream, errors.New("failed to set variable for extraParamsMap"))
}

// 4. update params.env regardless devFlags is provided of not
if err := deploy.ApplyParams(entryPath, imageParamMap, extraParamsMap); err != nil {
return fmt.Errorf("failed to update params.env from %s : %w", entryPath, err)
return status.UpdateFailedCondition(ComponentNameUpstream, fmt.Errorf("failed to update params.env from %s : %w", entryPath, err))
}
}

Expand All @@ -133,11 +134,11 @@ func (d *Dashboard) ReconcileComponent(ctx context.Context,
case cluster.SelfManagedRhods, cluster.ManagedRhods:
// anaconda
if err := cluster.CreateSecret(ctx, cli, "anaconda-ce-access", dscispec.ApplicationsNamespace); err != nil {
return fmt.Errorf("failed to create access-secret for anaconda: %w", err)
return status.UpdateFailedCondition(ComponentNameDownstream, fmt.Errorf("failed to create access-secret for anaconda: %w", err))
}
// Deploy RHOAI manifests
if err := deploy.DeployManifestsFromPath(ctx, cli, owner, entryPath, dscispec.ApplicationsNamespace, ComponentNameDownstream, enabled); err != nil {
return fmt.Errorf("failed to apply manifests from %s: %w", PathDownstream, err)
return status.UpdateFailedCondition(ComponentNameDownstream, fmt.Errorf("failed to apply manifests from %s: %w", PathDownstream, err))
}
l.Info("apply manifests done")

Expand All @@ -146,31 +147,31 @@ func (d *Dashboard) ReconcileComponent(ctx context.Context,
if enabled {
// first check if the service is up, so prometheus won't fire alerts when it is just startup
if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentNameDownstream, dscispec.ApplicationsNamespace, 20, 3); err != nil {
return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentNameDownstream, err)
return status.UpdateFailedCondition(ComponentNameDownstream, fmt.Errorf("deployment for %s is not ready to server: %w", ComponentNameDownstream, err))
}
l.Info("deployment is done, updating monitoring rules")
}

if err := d.UpdatePrometheusConfig(cli, enabled && monitoringEnabled, ComponentNameDownstream); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameDownstream, err)
}
if err := deploy.DeployManifestsFromPath(ctx, cli, owner,
filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"),
dscispec.Monitoring.Namespace,
"prometheus", true); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameDownstream, err)
}
l.Info("updating SRE monitoring done")
}
return nil
return status.GetDefaultComponentCondition(ComponentNameUpstream), nil

default:
// Deploy ODH manifests
if err := deploy.DeployManifestsFromPath(ctx, cli, owner, entryPath, dscispec.ApplicationsNamespace, ComponentNameUpstream, enabled); err != nil {
return err
return status.UpdateFailedCondition(ComponentNameUpstream, fmt.Errorf("failed to apply manifests from %s: %w", entryPath, err))
}
l.Info("apply manifests done")
return nil
return status.GetDefaultComponentCondition(ComponentNameUpstream), nil
}
}

Expand Down
24 changes: 9 additions & 15 deletions components/datasciencepipelines/datasciencepipelines.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/go-logr/logr"
operatorv1 "github.com/openshift/api/operator/v1"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -70,7 +69,7 @@ func (d *DataSciencePipelines) ReconcileComponent(ctx context.Context,
dscispec *dsciv1.DSCInitializationSpec,
platform cluster.Platform,
_ bool,
) error {
) (conditionsv1.Condition, error) {
l := d.ConfigComponentLogger(logger, ComponentName, dscispec)
var imageParamMap = map[string]string{
// v1
Expand Down Expand Up @@ -98,19 +97,19 @@ func (d *DataSciencePipelines) ReconcileComponent(ctx context.Context,
if d.DevFlags != nil {
// Download manifests and update paths
if err := d.OverrideManifests(ctx, platform); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
}
// skip check if the dependent operator has beeninstalled, this is done in dashboard
// Update image parameters only when we do not have customized manifests set
if (dscispec.DevFlags == nil || dscispec.DevFlags.ManifestsUri == "") && (d.DevFlags == nil || len(d.DevFlags.Manifests) == 0) {
if err := deploy.ApplyParams(Path, imageParamMap); err != nil {
return fmt.Errorf("failed to update image from %s : %w", Path, err)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("failed to update image from %s : %w", Path, err))
}
}
// Check for existing Argo Workflows
if err := UnmanagedArgoWorkFlowExists(ctx, cli); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
}

Expand All @@ -120,7 +119,7 @@ func (d *DataSciencePipelines) ReconcileComponent(ctx context.Context,
manifestsPath = filepath.Join(OverlayPath, "odh")
}
if err := deploy.DeployManifestsFromPath(ctx, cli, owner, manifestsPath, dscispec.ApplicationsNamespace, ComponentName, enabled); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("failed to apply manifests %s: %w", manifestsPath, err))
}
l.Info("apply manifests done")

Expand All @@ -130,24 +129,24 @@ func (d *DataSciencePipelines) ReconcileComponent(ctx context.Context,
// first check if the service is up, so prometheus won't fire alerts when it is just startup
// only 1 replica should be very quick
if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 10, 1); err != nil {
return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err)
return status.UpdateFailedCondition(ComponentName, fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err))
}
l.Info("deployment is done, updating monitoring rules")
}

if err := d.UpdatePrometheusConfig(cli, enabled && monitoringEnabled, ComponentName); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
if err := deploy.DeployManifestsFromPath(ctx, cli, owner,
filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"),
dscispec.Monitoring.Namespace,
"prometheus", true); err != nil {
return err
return status.UpdateFailedCondition(ComponentName, err)
}
l.Info("updating SRE monitoring done")
}

return nil
return status.GetDefaultComponentCondition(ComponentName), nil
}

func UnmanagedArgoWorkFlowExists(ctx context.Context,
Expand All @@ -167,8 +166,3 @@ func UnmanagedArgoWorkFlowExists(ctx context.Context,
return fmt.Errorf("%s CRD already exists but not deployed by this operator. "+
"Remove existing Argo workflows or set `spec.components.datasciencepipelines.managementState` to Removed to proceed ", ArgoWorkflowCRD)
}

func SetExistingArgoCondition(conditions *[]conditionsv1.Condition, reason, message string) {
status.SetCondition(conditions, string(status.CapabilityDSPv2Argo), reason, message, corev1.ConditionFalse)
status.SetComponentCondition(conditions, ComponentName, status.ReconcileFailed, message, corev1.ConditionFalse)
}
Loading

0 comments on commit 32fdb2d

Please sign in to comment.