From 89c0c338dba81ba5d7f936de38ae7dbea0f9a9d2 Mon Sep 17 00:00:00 2001 From: Andrew Denton Date: Fri, 4 Oct 2024 11:45:50 -0700 Subject: [PATCH] Check machine status and log details if it is not running --- pkg/cluster/condition.go | 26 ++++++++++++++++ pkg/cluster/condition_test.go | 58 ++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/pkg/cluster/condition.go b/pkg/cluster/condition.go index a5f1fbd9140..0c7eee820d6 100644 --- a/pkg/cluster/condition.go +++ b/pkg/cluster/condition.go @@ -28,6 +28,32 @@ func (m *manager) apiServersReady(ctx context.Context) (bool, error) { } func (m *manager) minimumWorkerNodesReady(ctx context.Context) (bool, error) { + machines, err := m.maocli.MachineV1beta1().Machines("openshift-machine-api").List(ctx, metav1.ListOptions{ + LabelSelector: "machine.openshift.io/cluster-api-machine-role=worker", + }) + if err != nil { + m.log.Error(err) + return false, nil + } + + readyWorkerMachines := 0 + for _, machine := range machines.Items { + if *machine.Status.Phase == "Running" { + readyWorkerMachines++ + } else { + if err == nil { + m.log.Errorf("Machine %s is %s; status: %v", machine.Name, *machine.Status.Phase, string(machine.Status.ProviderStatus.Raw)) + } else { + m.log.Errorf("Machine %s is %s; error decoding status: %s", machine.Name, *machine.Status.Phase, err) + } + } + } + + if readyWorkerMachines < minimumWorkerNodes { + m.log.Infof("%d machines out of %d machines ready", readyWorkerMachines, minimumWorkerNodes) + return false, nil + } + nodes, err := m.kubernetescli.CoreV1().Nodes().List(ctx, metav1.ListOptions{ LabelSelector: "node-role.kubernetes.io/worker", }) diff --git a/pkg/cluster/condition_test.go b/pkg/cluster/condition_test.go index a0515659204..f5ee3d84f4a 100644 --- a/pkg/cluster/condition_test.go +++ b/pkg/cluster/condition_test.go @@ -5,13 +5,16 @@ package cluster import ( "context" + "encoding/json" "errors" "testing" "time" configv1 "github.com/openshift/api/config/v1" + machinev1beta1 "github.com/openshift/api/machine/v1beta1" operatorv1 "github.com/openshift/api/operator/v1" configfake "github.com/openshift/client-go/config/clientset/versioned/fake" + machinefake "github.com/openshift/client-go/machine/clientset/versioned/fake" operatorfake "github.com/openshift/client-go/operator/clientset/versioned/fake" cloudcredentialv1 "github.com/openshift/cloud-credential-operator/pkg/apis/cloudcredential/v1" "github.com/sirupsen/logrus" @@ -29,6 +32,13 @@ import ( const errMustBeNilMsg = "err must be nil; condition is retried until timeout" +func marshalAzureMachineProviderStatus(t *testing.T, status *machinev1beta1.AzureMachineProviderStatus) *runtime.RawExtension { + buf, _ := json.Marshal(status) + return &runtime.RawExtension{ + Raw: buf, + } +} + func TestOperatorConsoleExists(t *testing.T) { ctx := context.Background() @@ -118,6 +128,8 @@ func TestIsOperatorAvailable(t *testing.T) { func TestMinimumWorkerNodesReady(t *testing.T) { ctx := context.Background() + machineStateRunning := "Running" + machineStateFailed := "Failed" for _, tt := range []struct { name string @@ -149,6 +161,50 @@ func TestMinimumWorkerNodesReady(t *testing.T) { }, } { m := &manager{ + log: logrus.NewEntry(logrus.StandardLogger()), + maocli: machinefake.NewSimpleClientset( + &machinev1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "node1", + Namespace: "openshift-machine-api", + Labels: map[string]string{ + "machine.openshift.io/cluster-api-machine-role": "worker", + "machine.openshift.io/cluster-api-machine-type": "worker", + }, + }, + Status: machinev1beta1.MachineStatus{ + Phase: &machineStateRunning, + ProviderStatus: marshalAzureMachineProviderStatus(t, &machinev1beta1.AzureMachineProviderStatus{}), + }, + }, + &machinev1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "node2", + Namespace: "openshift-machine-api", + Labels: map[string]string{ + "machine.openshift.io/cluster-api-machine-role": "worker", + "machine.openshift.io/cluster-api-machine-type": "worker", + }, + }, + Status: machinev1beta1.MachineStatus{ + Phase: &machineStateRunning, + ProviderStatus: marshalAzureMachineProviderStatus(t, &machinev1beta1.AzureMachineProviderStatus{}), + }, + }, + &machinev1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "node3", + Namespace: "openshift-machine-api", + Labels: map[string]string{ + "machine.openshift.io/cluster-api-machine-role": "worker", + "machine.openshift.io/cluster-api-machine-type": "worker", + }, + }, + Status: machinev1beta1.MachineStatus{ + Phase: &machineStateFailed, + ProviderStatus: marshalAzureMachineProviderStatus(t, &machinev1beta1.AzureMachineProviderStatus{}), + }, + }, + testMachine(t, "openshift-machine-api", "master1", &machinev1beta1.AzureMachineProviderSpec{}), + testMachine(t, "openshift-machine-api", "master2", &machinev1beta1.AzureMachineProviderSpec{}), + ), kubernetescli: fake.NewSimpleClientset(&corev1.NodeList{ Items: []corev1.Node{ { @@ -187,7 +243,7 @@ func TestMinimumWorkerNodesReady(t *testing.T) { t.Error(errMustBeNilMsg) } if ready != tt.want { - t.Error(ready) + t.Error(tt.name, ready) } } }