Skip to content

Commit 4d21397

Browse files
Merge pull request #361 from openshift-cherrypick-robot/cherry-pick-360-to-release-0.9
[release-0.9] Fix orphaned CR handling
2 parents 96b4bf2 + 2cc2d2e commit 4d21397

File tree

2 files changed

+31
-17
lines changed

2 files changed

+31
-17
lines changed

Diff for: controllers/nodehealthcheck_controller.go

+7-8
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ func (r *NodeHealthCheckReconciler) Reconcile(ctx context.Context, req ctrl.Requ
170170
defer func() {
171171
patchErr := r.patchStatus(ctx, log, nhc, nhcOrig)
172172
if patchErr != nil {
173-
log.Error(err, "failed to update status")
173+
log.Error(patchErr, "failed to update status")
174174
}
175175
returnErr = utilerrors.NewAggregate([]error{patchErr, returnErr})
176176
log.Info("reconcile end", "error", returnErr, "requeue", result.Requeue, "requeuAfter", result.RequeueAfter)
@@ -465,12 +465,14 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
465465
}
466466

467467
// check conditions
468+
// for some remediators (e.g. MDR) node deletion is expected. If so, wait until they are succeeded.
469+
// for all other remediators, we can delete the CRs immediately after node deletion
468470
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
469471
permanentNodeDeletionExpected := permanentNodeDeletionExpectedCondition != nil && permanentNodeDeletionExpectedCondition.Status == metav1.ConditionTrue
470472
succeededCondition := getCondition(&cr, commonconditions.SucceededType, log)
471473
succeeded := succeededCondition != nil && succeededCondition.Status == metav1.ConditionTrue
472-
if !permanentNodeDeletionExpected || !succeeded {
473-
// no node name change expected, or not succeeded yet
474+
if permanentNodeDeletionExpected && !succeeded {
475+
// node deletion is expected, but remediation not succeeded yet
474476
return false
475477
}
476478

@@ -504,13 +506,10 @@ func (r *NodeHealthCheckReconciler) deleteOrphanedRemediationCRs(nhc *remediatio
504506
resources.UpdateStatusNodeHealthy(nodeName, nhc)
505507

506508
if deleted, err := rm.DeleteRemediationCR(&cr, nhc); err != nil {
507-
log.Error(err, "failed to delete remediation CR", "name", cr.GetName())
509+
log.Error(err, "failed to delete orphaned remediation CR", "name", cr.GetName())
508510
return err
509511
} else if deleted {
510-
permanentNodeDeletionExpectedCondition := getCondition(&cr, commonconditions.PermanentNodeDeletionExpectedType, log)
511-
log.Info("deleted orphaned remediation CR", "name", cr.GetName(),
512-
"reason", permanentNodeDeletionExpectedCondition.Reason,
513-
"message", permanentNodeDeletionExpectedCondition.Message)
512+
log.Info("deleted orphaned remediation CR", "name", cr.GetName(), "for deleted node", nodeName)
514513
}
515514

516515
}

Diff for: controllers/nodehealthcheck_controller_test.go

+24-9
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,10 @@ var _ = Describe("Node Health Check CR", func() {
210210
Expect(k8sClient.Update(context.Background(), &item)).To(Succeed())
211211
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(&item), &item)).To(Succeed())
212212
}
213-
Expect(k8sClient.Delete(context.Background(), &item)).To(Succeed())
213+
err := k8sClient.Delete(context.Background(), &item)
214+
if err != nil {
215+
Expect(errors.IsNotFound(err)).To(BeTrue())
216+
}
214217
}
215218
}
216219

@@ -1288,15 +1291,15 @@ var _ = Describe("Node Health Check CR", func() {
12881291
Expect(k8sClient.Delete(context.Background(), node))
12891292
}
12901293

1291-
markCR := func() *unstructured.Unstructured {
1292-
By("marking CR as succeeded and permanent node deletion expected")
1294+
markCR := func(succeededStatus metav1.ConditionStatus) *unstructured.Unstructured {
1295+
By("marking CR for permanent node deletion expected")
12931296
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
12941297
Expect(cr).ToNot(BeNil())
12951298

12961299
conditions := []interface{}{
12971300
map[string]interface{}{
12981301
"type": commonconditions.SucceededType,
1299-
"status": "True",
1302+
"status": string(succeededStatus),
13001303
"lastTransitionTime": time.Now().Format(time.RFC3339),
13011304
},
13021305
map[string]interface{}{
@@ -1322,18 +1325,30 @@ var _ = Describe("Node Health Check CR", func() {
13221325
Expect(underTest.Status.UnhealthyNodes).To(BeEmpty())
13231326
}
13241327

1325-
It("it should delete orphaned CR when CR was updated", func() {
1328+
ensureCRNotDeleted := func(cr *unstructured.Unstructured) {
1329+
By("ensuring CR is not deleted")
1330+
Consistently(func() error {
1331+
return k8sClient.Get(context.Background(), client.ObjectKeyFromObject(cr), cr)
1332+
}, "10s", "1s").ShouldNot(HaveOccurred())
1333+
1334+
// get updated NHC
1335+
Expect(k8sClient.Get(context.Background(), client.ObjectKeyFromObject(underTest), underTest)).To(Succeed())
1336+
Expect(underTest.Status.UnhealthyNodes).ToNot(BeEmpty())
1337+
}
1338+
1339+
It("it should delete orphaned CR when CR with expected node deletion succeeded", func() {
13261340
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
1341+
cr := markCR(metav1.ConditionFalse)
13271342
deleteNode()
1328-
time.Sleep(1 * time.Second)
1329-
cr := markCR()
1343+
ensureCRNotDeleted(cr)
1344+
cr = markCR(metav1.ConditionTrue)
13301345
expectCRDeletion(cr)
13311346
})
13321347

13331348
It("it should delete orphaned CR when node is deleted", func() {
13341349
Expect(underTest.Status.UnhealthyNodes).To(HaveLen(1))
1335-
cr := markCR()
1336-
time.Sleep(1 * time.Second)
1350+
cr := findRemediationCRForNHC("unhealthy-worker-node-1", underTest)
1351+
Expect(cr).ToNot(BeNil())
13371352
deleteNode()
13381353
expectCRDeletion(cr)
13391354
})

0 commit comments

Comments
 (0)