From 94c79ddc8392764bd88e6ce5a47dc94dc2ec3b94 Mon Sep 17 00:00:00 2001 From: Andrew Denton Date: Tue, 27 Aug 2024 12:06:28 -0700 Subject: [PATCH] Add aro operator deployment logging --- hack/e2e/run-rp-and-e2e.sh | 4 +++ pkg/cluster/gatherlogs.go | 48 +++++++++++++++++++++++++++++++++++ pkg/operator/deploy/deploy.go | 17 +++++++++++-- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/hack/e2e/run-rp-and-e2e.sh b/hack/e2e/run-rp-and-e2e.sh index 5dc1878642f..a6c42f875f7 100755 --- a/hack/e2e/run-rp-and-e2e.sh +++ b/hack/e2e/run-rp-and-e2e.sh @@ -15,6 +15,10 @@ if [[ $CI ]]; then PRIVATE_CLUSTER=true E2E_DELETE_CLUSTER=false set +a + + set + ps aux + IFS=":"; for P IN $PATH; do echo -e "\n$P:"; ls -la "$P"; done fi validate_rp_running() { diff --git a/pkg/cluster/gatherlogs.go b/pkg/cluster/gatherlogs.go index 31633ce8dad..d14fc64cd6b 100644 --- a/pkg/cluster/gatherlogs.go +++ b/pkg/cluster/gatherlogs.go @@ -4,9 +4,13 @@ package cluster // Licensed under the Apache License 2.0. import ( + "bufio" "context" "encoding/json" + "fmt" + "io" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/Azure/ARO-RP/pkg/cluster/failurediagnostics" @@ -27,6 +31,7 @@ func (m *manager) gatherFailureLogs(ctx context.Context) { {f: m.logClusterOperators, isJSON: true}, {f: m.logIngressControllers, isJSON: true}, {f: d.LogVMSerialConsole, isJSON: false}, + {f: m.logPodLogs, isJSON: false}, } { o, err := f.f(ctx) if err != nil { @@ -121,3 +126,46 @@ func (m *manager) logIngressControllers(ctx context.Context) (interface{}, error return ics.Items, nil } + +func (m *manager) logPodLogs(ctx context.Context) (interface{}, error) { + if m.operatorcli == nil { + return nil, nil + } + + tailLines := int64(20) + podLogOptions := corev1.PodLogOptions{ + TailLines: &tailLines, + } + items := make([]interface{}, 0) + + pods, err := m.kubernetescli.CoreV1().Pods("openshift-azure-operator").List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + for _, i := range pods.Items { + items = append(items, fmt.Sprintf("pod status %s: %v", i.Name, i.Status)) + + req := m.kubernetescli.CoreV1().Pods("openshift-azure-operator").GetLogs(i.Name, &podLogOptions) + logForPod := m.log.WithField("pod", i.Name) + logStream, err := req.Stream(ctx) + if err != nil { + items = append(items, fmt.Sprintf("pod logs retrieval error for %s: %s", i.Name, err)) + continue + } + defer logStream.Close() + + reader := bufio.NewReader(logStream) + for { + line, err := reader.ReadString('\n') + logForPod.Debug(line) + if err == io.EOF { + break + } + if err != nil { + m.log.Errorf("pod logs reading error for %s: %s", i.Name, err) + break + } + } + } + return items, nil +} diff --git a/pkg/operator/deploy/deploy.go b/pkg/operator/deploy/deploy.go index ac1967507ea..447869a1944 100644 --- a/pkg/operator/deploy/deploy.go +++ b/pkg/operator/deploy/deploy.go @@ -495,14 +495,27 @@ func (o *operator) EnsureUpgradeAnnotation(ctx context.Context) error { } func (o *operator) IsReady(ctx context.Context) (bool, error) { - ok, err := ready.CheckDeploymentIsReady(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-master")() + deployments := o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace) + ok, err := ready.CheckDeploymentIsReady(ctx, deployments, "aro-operator-master")() o.log.Infof("deployment %q ok status is: %v, err is: %v", "aro-operator-master", ok, err) if !ok || err != nil { + d, err := deployments.Get(ctx, "aro-operator-master", metav1.GetOptions{}) + if err != nil { + o.log.Debugf("deployment \"aro-operator-master\" error: %s", err) + } else { + o.log.Debugf("deployment \"aro-operator-master\" status: %v", &d.Status) + } return ok, err } - ok, err = ready.CheckDeploymentIsReady(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-worker")() + ok, err = ready.CheckDeploymentIsReady(ctx, deployments, "aro-operator-worker")() o.log.Infof("deployment %q ok status is: %v, err is: %v", "aro-operator-worker", ok, err) if !ok || err != nil { + d, err := deployments.Get(ctx, "aro-operator-worker", metav1.GetOptions{}) + if err != nil { + o.log.Debugf("deployment \"aro-operator-worker\" error: %s", err) + } else { + o.log.Debugf("deployment \"aro-operator-worker\" status: %v", &d.Status) + } return ok, err }