Skip to content

Commit

Permalink
Run UpgradeJob hooks even if no update exists (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
bastjan authored Jul 21, 2023
1 parent a449e4e commit f1e0ae3
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 79 deletions.
6 changes: 4 additions & 2 deletions api/v1beta1/upgradejob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ type UpgradeJobSpec struct {
// If the upgrade job is not started before this time, it is considered failed.
StartBefore metav1.Time `json:"startBefore"`

// DesiredVersion defines the desired version to upgrade to
DesiredVersion configv1.Update `json:"desiredVersion"`
// DesiredVersion defines the desired version to upgrade to.
// Can be empty if the upgrade job was created when there was no new version available.
// +optional
DesiredVersion *configv1.Update `json:"desiredVersion,omitempty"`

// UpgradeJobConfig defines the configuration for the upgrade job
UpgradeJobConfig `json:"config"`
Expand Down
11 changes: 8 additions & 3 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions config/crd/bases/managedupgrade.appuio.io_upgradejobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ spec:
type: object
desiredVersion:
description: DesiredVersion defines the desired version to upgrade
to
to. Can be empty if the upgrade job was created when there was no
new version available.
properties:
force:
description: force allows an administrator to update to an image
Expand Down Expand Up @@ -123,7 +124,6 @@ spec:
type: string
required:
- config
- desiredVersion
- startAfter
- startBefore
type: object
Expand Down
28 changes: 15 additions & 13 deletions controllers/upgradeconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,14 @@ findNextRun:
return ctrl.Result{}, fmt.Errorf("could not get cluster version: %w", err)
}

latestUpdate := clusterversion.LatestAvailableUpdate(cv)
if latestUpdate == nil {
l.Info("no updates available")
return ctrl.Result{}, r.setLastScheduledUpgrade(ctx, &uc, nextRun)
}

// Schedule is suspended, do nothing
if uc.Spec.Schedule.Suspend {
l.Info("would schedule job, but schedule is suspended")
return ctrl.Result{}, r.setLastScheduledUpgrade(ctx, &uc, nextRun)
}

if err := r.createJob(uc, *latestUpdate, nextRun, ctx); err != nil {
latestUpdate := clusterversion.LatestAvailableUpdate(cv)
if err := r.createJob(uc, latestUpdate, nextRun, ctx); err != nil {
return ctrl.Result{}, fmt.Errorf("could not create job: %w", err)
}

Expand All @@ -165,10 +160,20 @@ func (r *UpgradeConfigReconciler) setLastScheduledUpgrade(ctx context.Context, u
return r.Status().Update(ctx, uc)
}

func (r *UpgradeConfigReconciler) createJob(uc managedupgradev1beta1.UpgradeConfig, latestUpdate configv1.Release, nextRun time.Time, ctx context.Context) error {
func (r *UpgradeConfigReconciler) createJob(uc managedupgradev1beta1.UpgradeConfig, latestUpdate *configv1.Release, nextRun time.Time, ctx context.Context) error {
var dv *configv1.Update
vn := "noop-"
if latestUpdate != nil {
dv = &configv1.Update{
Version: latestUpdate.Version,
Image: latestUpdate.Image,
}
vn = strings.ReplaceAll(latestUpdate.Version, ".", "-") + "-"
}

newJob := managedupgradev1beta1.UpgradeJob{
ObjectMeta: metav1.ObjectMeta{
Name: uc.Name + "-" + strings.ReplaceAll(latestUpdate.Version, ".", "-") + "-" + strconv.FormatInt(nextRun.Unix(), 10),
Name: uc.Name + "-" + vn + strconv.FormatInt(nextRun.Unix(), 10),
Namespace: uc.Namespace,
Annotations: uc.Spec.JobTemplate.Metadata.GetAnnotations(),
Labels: uc.Spec.JobTemplate.Metadata.GetLabels(),
Expand All @@ -177,10 +182,7 @@ func (r *UpgradeConfigReconciler) createJob(uc managedupgradev1beta1.UpgradeConf
StartAfter: metav1.NewTime(nextRun),
StartBefore: metav1.NewTime(nextRun.Add(uc.Spec.MaxUpgradeStartDelay.Duration)),

DesiredVersion: configv1.Update{
Version: latestUpdate.Version,
Image: latestUpdate.Image,
},
DesiredVersion: dv,

UpgradeJobConfig: uc.Spec.JobTemplate.Spec.Config,
},
Expand Down
7 changes: 5 additions & 2 deletions controllers/upgradeconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ func Test_UpgradeConfigReconciler_Reconcile_E2E(t *testing.T) {
require.NoError(t, err)

jobs := listJobs(t, client, upgradeConfig.Namespace)
require.Len(t, jobs, 0, "no upgrade available, no job should be scheduled")
require.Len(t, jobs, 1, "job with empty desired version should be created")
emptyJob := jobs[0]
require.Nil(t, emptyJob.Spec.DesiredVersion)
require.NoError(t, client.Delete(ctx, &emptyJob))
var uuc managedupgradev1beta1.UpgradeConfig
expectedStartAfter := clock.Now().Add(upgradeConfig.Spec.PinVersionWindow.Duration)
require.NoError(t, client.Get(ctx, types.NamespacedName{Name: upgradeConfig.Name, Namespace: upgradeConfig.Namespace}, &uuc))
Expand Down Expand Up @@ -134,7 +137,7 @@ func Test_UpgradeConfigReconciler_Reconcile_E2E(t *testing.T) {
require.Equal(t, configv1.Update{
Version: ucv.Status.AvailableUpdates[0].Version,
Image: ucv.Status.AvailableUpdates[0].Image,
}, job.Spec.DesiredVersion)
}, *job.Spec.DesiredVersion)
})

step(t, "there is a future job. do nothing", func(t *testing.T) {
Expand Down
96 changes: 49 additions & 47 deletions controllers/upgradejob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,60 +184,62 @@ func (r *UpgradeJobReconciler) reconcileStartedJob(ctx context.Context, uj *mana
return ctrl.Result{}, nil
}

// Check if the desired version is already set
if version.Spec.DesiredUpdate == nil || *version.Spec.DesiredUpdate != uj.Spec.DesiredVersion {
update := clusterversion.FindAvailableUpdate(version, uj.Spec.DesiredVersion.Image, uj.Spec.DesiredVersion.Version)
if update == nil {
if uj.Spec.DesiredVersion != nil {
// Check if the desired version is already set
if version.Spec.DesiredUpdate == nil || *version.Spec.DesiredUpdate != *uj.Spec.DesiredVersion {
update := clusterversion.FindAvailableUpdate(version, uj.Spec.DesiredVersion.Image, uj.Spec.DesiredVersion.Version)
if update == nil {
r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionFailed,
Status: metav1.ConditionTrue,
Reason: managedupgradev1beta1.UpgradeJobReasonUpgradeWithdrawn,
Message: fmt.Sprintf("Upgrade became unavailable: %s", uj.Spec.DesiredVersion.Version),
})
return ctrl.Result{}, r.Status().Update(ctx, uj)
}
// Start the upgrade
version.Spec.DesiredUpdate = uj.Spec.DesiredVersion
if err := r.Update(ctx, &version); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update desired version in cluster version: %w", err)
}
return ctrl.Result{}, nil
}

// Check if the upgrade is done
upgradedCon := apimeta.FindStatusCondition(uj.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted)
if upgradedCon == nil {
r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionFailed,
Status: metav1.ConditionTrue,
Reason: managedupgradev1beta1.UpgradeJobReasonUpgradeWithdrawn,
Message: fmt.Sprintf("Upgrade became unavailable: %s", uj.Spec.DesiredVersion.Version),
Type: managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted,
Status: metav1.ConditionFalse,
Reason: managedupgradev1beta1.UpgradeJobReasonInProgress,
Message: "Upgrade in progress",
})
return ctrl.Result{}, r.Status().Update(ctx, uj)
}
// Start the upgrade
version.Spec.DesiredUpdate = &uj.Spec.DesiredVersion
if err := r.Update(ctx, &version); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update desired version in cluster version: %w", err)
}
return ctrl.Result{}, nil
}
if upgradedCon.Status != metav1.ConditionTrue {
if !clusterversion.IsVersionUpgradeCompleted(version) {
l.Info("Upgrade still in progress")
return ctrl.Result{}, nil
}

// Check if the upgrade is done
upgradedCon := apimeta.FindStatusCondition(uj.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted)
if upgradedCon == nil {
r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted,
Status: metav1.ConditionFalse,
Reason: managedupgradev1beta1.UpgradeJobReasonInProgress,
Message: "Upgrade in progress",
})
return ctrl.Result{}, r.Status().Update(ctx, uj)
}
if upgradedCon.Status != metav1.ConditionTrue {
if !clusterversion.IsVersionUpgradeCompleted(version) {
l.Info("Upgrade still in progress")
return ctrl.Result{}, nil
}
mcpl := machineconfigurationv1.MachineConfigPoolList{}
if err := r.List(ctx, &mcpl); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to list machine config pools: %w", err)
}
poolsUpdating := healthcheck.MachineConfigPoolsUpdating(mcpl)
if len(poolsUpdating) > 0 {
l.Info("Machine config pools still updating", "pools", poolsUpdating)
return ctrl.Result{}, nil
}

mcpl := machineconfigurationv1.MachineConfigPoolList{}
if err := r.List(ctx, &mcpl); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to list machine config pools: %w", err)
}
poolsUpdating := healthcheck.MachineConfigPoolsUpdating(mcpl)
if len(poolsUpdating) > 0 {
l.Info("Machine config pools still updating", "pools", poolsUpdating)
return ctrl.Result{}, nil
r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted,
Status: metav1.ConditionTrue,
Reason: managedupgradev1beta1.UpgradeJobReasonCompleted,
Message: "Upgrade completed",
})
return ctrl.Result{}, r.Status().Update(ctx, uj)
}

r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionUpgradeCompleted,
Status: metav1.ConditionTrue,
Reason: managedupgradev1beta1.UpgradeJobReasonCompleted,
Message: "Upgrade completed",
})
return ctrl.Result{}, r.Status().Update(ctx, uj)
}

ok, err = r.runHealthCheck(ctx, uj, version,
Expand Down
71 changes: 65 additions & 6 deletions controllers/upgradejob_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func Test_UpgradeJobReconciler_Reconcile_E2E_Upgrade(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(10 * time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
Image: "quay.io/openshift-release-dev/ocp-release@sha256:d094f1952995b3c5fd8e0b19b128905931e1e8fdb4b6cb377857ab0dfddcff47",
},
Expand Down Expand Up @@ -282,6 +282,65 @@ func Test_UpgradeJobReconciler_Reconcile_E2E_Upgrade(t *testing.T) {
})
}

func Test_UpgradeJobReconciler_Reconcile_EmptyDesiredVersion(t *testing.T) {
ctx := context.Background()
clock := mockClock{now: time.Date(2022, 12, 4, 22, 45, 0, 0, time.UTC)}

upgradeJob := &managedupgradev1beta1.UpgradeJob{
ObjectMeta: metav1.ObjectMeta{
Name: "upgrade-123123",
Namespace: "appuio-openshift-upgrade-controller",
Labels: map[string]string{"test": "test"},
},
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-time.Hour)),
},
}
upgradeJobHook := &managedupgradev1beta1.UpgradeJobHook{
ObjectMeta: metav1.ObjectMeta{
Name: "notify",
Namespace: "appuio-openshift-upgrade-controller",
},
Spec: managedupgradev1beta1.UpgradeJobHookSpec{
Selector: metav1.LabelSelector{
MatchLabels: upgradeJob.Labels,
},
Run: managedupgradev1beta1.RunAll,
Events: []managedupgradev1beta1.UpgradeEvent{
managedupgradev1beta1.EventStart,
managedupgradev1beta1.EventUpgradeComplete,
managedupgradev1beta1.EventSuccess,
managedupgradev1beta1.EventFinish,
},
},
}

c := controllerClient(t, upgradeJob, upgradeJobHook,
&configv1.ClusterVersion{
ObjectMeta: metav1.ObjectMeta{
Name: "version",
}})

subject := &UpgradeJobReconciler{
Client: c,
Scheme: c.Scheme(),

Clock: &clock,

ManagedUpstreamClusterVersionName: "version",
}

reconcileNTimes(t, subject, ctx, requestForObject(upgradeJob), 3)
checkAndCompleteHook(t, c, subject, upgradeJob, upgradeJobHook, managedupgradev1beta1.EventStart, false)
reconcileNTimes(t, subject, ctx, requestForObject(upgradeJob), 3)
checkAndCompleteHook(t, c, subject, upgradeJob, upgradeJobHook, managedupgradev1beta1.EventUpgradeComplete, false)
reconcileNTimes(t, subject, ctx, requestForObject(upgradeJob), 3)
checkAndCompleteHook(t, c, subject, upgradeJob, upgradeJobHook, managedupgradev1beta1.EventSuccess, false)
reconcileNTimes(t, subject, ctx, requestForObject(upgradeJob), 3)
checkAndCompleteHook(t, c, subject, upgradeJob, upgradeJobHook, managedupgradev1beta1.EventFinish, false)
}

func Test_UpgradeJobReconciler_Reconcile_HookFailed(t *testing.T) {
ctx := context.Background()
clock := mockClock{now: time.Date(2022, 12, 4, 22, 45, 0, 0, time.UTC)}
Expand Down Expand Up @@ -367,7 +426,7 @@ func Test_UpgradeJobReconciler_Reconcile_HookJobContainerEnv(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(-time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-7 * time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
},
},
Expand Down Expand Up @@ -607,7 +666,7 @@ func Test_UpgradeJobReconciler_Reconcile_UpgradeWithdrawn(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
Image: "quay.io/openshift-release-dev/ocp-release@sha256:d094f1952995b3c5fd8e0b19b128905931e1e8fdb4b6cb377857ab0dfddcff47",
},
Expand Down Expand Up @@ -664,7 +723,7 @@ func Test_UpgradeJobReconciler_Reconcile_Timeout(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(3 * time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
},
UpgradeJobConfig: managedupgradev1beta1.UpgradeJobConfig{
Expand Down Expand Up @@ -722,7 +781,7 @@ func Test_UpgradeJobReconciler_Reconcile_PreHealthCheckTimeout(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(3 * time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
},
UpgradeJobConfig: managedupgradev1beta1.UpgradeJobConfig{
Expand Down Expand Up @@ -795,7 +854,7 @@ func Test_UpgradeJobReconciler_Reconcile_PostHealthCheckTimeout(t *testing.T) {
Spec: managedupgradev1beta1.UpgradeJobSpec{
StartBefore: metav1.NewTime(clock.Now().Add(3 * time.Hour)),
StartAfter: metav1.NewTime(clock.Now().Add(-time.Hour)),
DesiredVersion: configv1.Update{
DesiredVersion: &configv1.Update{
Version: "4.5.13",
},
UpgradeJobConfig: managedupgradev1beta1.UpgradeJobConfig{
Expand Down
10 changes: 7 additions & 3 deletions controllers/upgrading_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,20 @@ func (m *ClusterUpgradingMetric) Collect(ch chan<- prometheus.Metric) {
}

for _, job := range jobs.Items {
v := job.Spec.DesiredVersion
if v == nil {
v = &configv1.Update{}
}
ch <- prometheus.MustNewConstMetric(
jobStates,
prometheus.GaugeValue,
1,
job.Name,
job.Spec.StartAfter.UTC().Format(time.RFC3339),
job.Spec.StartBefore.UTC().Format(time.RFC3339),
strconv.FormatBool(job.Spec.DesiredVersion.Force),
job.Spec.DesiredVersion.Image,
job.Spec.DesiredVersion.Version,
strconv.FormatBool(v.Force),
v.Image,
v.Version,
jobState(job),
)
}
Expand Down
Loading

0 comments on commit f1e0ae3

Please sign in to comment.