Skip to content

Commit

Permalink
SKS-3242: Replace WaitingForAvailableHostWithSufficientMemory with Wa…
Browse files Browse the repository at this point in the history
…itingForELFClusterWithSufficientMemory (#190)

#189
  • Loading branch information
haijianyang authored Dec 18, 2024
1 parent 0f06e84 commit 511af15
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 6 deletions.
4 changes: 0 additions & 4 deletions api/v1beta1/conditions_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,6 @@ const (
// are automatically re-tried by the controller.
SelectingGPUFailedReason = "SelectingGPUFailed"

// WaitingForAvailableHostWithSufficientMemoryReason (Severity=Info) documents an ElfMachine
// waiting for an available host with sufficient memory to create VM.
WaitingForAvailableHostWithSufficientMemoryReason = "WaitingForAvailableHostWithSufficientMemory"

// WaitingForAvailableHostWithEnoughGPUsReason (Severity=Info) documents an ElfMachine
// waiting for an available host with enough GPUs to create VM.
WaitingForAvailableHostWithEnoughGPUsReason = "WaitingForAvailableHostWithEnoughGPUs"
Expand Down
2 changes: 1 addition & 1 deletion controllers/elfmachine_controller_gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx goctx.Context, machine
conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.SelectingGPUFailedReason, clusterv1.ConditionSeverityError, reterr.Error())
} else if rethost == nil {
if availableHosts.Len() == 0 {
conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostWithSufficientMemoryReason, clusterv1.ConditionSeverityWarning, "")
conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForELFClusterWithSufficientMemoryReason, clusterv1.ConditionSeverityWarning, "")
log.V(1).Info("Waiting for enough available hosts")
} else {
conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostWithEnoughGPUsReason, clusterv1.ConditionSeverityInfo, "")
Expand Down
2 changes: 1 addition & 1 deletion controllers/elfmachine_controller_gpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() {
ok, err = reconciler.reconcileGPUDevices(ctx, machineContext, vm)
Expect(err).NotTo(HaveOccurred())
Expect(ok).To(BeFalse())
expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.WaitingForAvailableHostWithSufficientMemoryReason}})
expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.WaitingForELFClusterWithSufficientMemoryReason}})
})

It("should remove GPU devices to VM when detect host are not sufficient", func() {
Expand Down

0 comments on commit 511af15

Please sign in to comment.