Skip to content

Commit

Permalink
Add aro operator deployment logging
Browse files Browse the repository at this point in the history
  • Loading branch information
ventifus committed Oct 9, 2024
1 parent e83bf05 commit c0267fe
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 11 deletions.
48 changes: 48 additions & 0 deletions pkg/cluster/gatherlogs.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ package cluster
// Licensed under the Apache License 2.0.

import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/Azure/ARO-RP/pkg/cluster/failurediagnostics"
Expand All @@ -26,6 +30,7 @@ func (m *manager) gatherFailureLogs(ctx context.Context) {
{f: m.logNodes, isJSON: true},
{f: m.logClusterOperators, isJSON: true},
{f: m.logIngressControllers, isJSON: true},
{f: m.logPodLogs, isJSON: false},
{f: d.LogVMSerialConsole, isJSON: false},
} {
o, err := f.f(ctx)
Expand Down Expand Up @@ -121,3 +126,46 @@ func (m *manager) logIngressControllers(ctx context.Context) (interface{}, error

return ics.Items, nil
}

func (m *manager) logPodLogs(ctx context.Context) (interface{}, error) {
if m.operatorcli == nil {
return nil, nil
}

tailLines := int64(20)
podLogOptions := corev1.PodLogOptions{
TailLines: &tailLines,
}
items := make([]interface{}, 0)

pods, err := m.kubernetescli.CoreV1().Pods("openshift-azure-operator").List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
for _, i := range pods.Items {
items = append(items, fmt.Sprintf("pod status %s: %v", i.Name, i.Status))

req := m.kubernetescli.CoreV1().Pods("openshift-azure-operator").GetLogs(i.Name, &podLogOptions)
logForPod := m.log.WithField("pod", i.Name)
logStream, err := req.Stream(ctx)
if err != nil {
items = append(items, fmt.Sprintf("pod logs retrieval error for %s: %s", i.Name, err))
continue
}
defer logStream.Close()

reader := bufio.NewReader(logStream)
for {
line, err := reader.ReadString('\n')
logForPod.Info(line)
if err == io.EOF {
break
}
if err != nil {
m.log.Errorf("pod logs reading error for %s: %s", i.Name, err)
break
}
}
}
return items, nil
}
82 changes: 71 additions & 11 deletions pkg/operator/deploy/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"strings"
Expand Down Expand Up @@ -495,18 +496,77 @@ func (o *operator) EnsureUpgradeAnnotation(ctx context.Context) error {
}

func (o *operator) IsReady(ctx context.Context) (bool, error) {
ok, err := ready.CheckDeploymentIsReady(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-master")()
o.log.Infof("deployment %q ok status is: %v, err is: %v", "aro-operator-master", ok, err)
if !ok || err != nil {
return ok, err
}
ok, err = ready.CheckDeploymentIsReady(ctx, o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace), "aro-operator-worker")()
o.log.Infof("deployment %q ok status is: %v, err is: %v", "aro-operator-worker", ok, err)
if !ok || err != nil {
return ok, err
}
deploymentOk := true
var deploymentErr error

deployments := o.kubernetescli.AppsV1().Deployments(pkgoperator.Namespace)
replicasets := o.kubernetescli.AppsV1().ReplicaSets(pkgoperator.Namespace)
pods := o.kubernetescli.CoreV1().Pods(pkgoperator.Namespace)

for _, deployment := range []string{"aro-operator-master", "aro-operator-worker"} {
ok, err := ready.CheckDeploymentIsReady(ctx, deployments, deployment)()
o.log.Infof("deployment %q ok status is: %v, err is: %v", deployment, ok, err)
deploymentOk = deploymentOk && ok
if deploymentErr == nil && err != nil {
deploymentErr = err
}
if ok {
continue
}

return true, nil
d, err := deployments.Get(ctx, deployment, metav1.GetOptions{})
if err != nil {
o.log.Errorf("failed to get deployment %q: %s", deployment, err)
continue
}
j, err := json.Marshal(d.Status)
if err != nil {
o.log.Errorf("failed to serialize deployment %q: %s", deployment, err)
continue
}
o.log.Infof("deployment %q status: %s", deployment, string(j))

// Gather and print status of this deployment's replicasets
rs, err := replicasets.List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("app=%s", deployment)})
if err != nil {
o.log.Errorf("failed to list replicasets: %s", err)
continue
}
for _, replicaset := range rs.Items {
r, err := replicasets.Get(ctx, replicaset.Name, metav1.GetOptions{})
if err != nil {
o.log.Errorf("failed to get replicaset %s: %s", replicaset.Name, err)
continue
}
j, err := json.Marshal(r.Status)
if err != nil {
o.log.Errorf("failed to serialize replicaset status %q: %s", replicaset.Name, err)
continue
}
o.log.Infof("replicaset %q status: %s", replicaset.Name, string(j))
}

// Gather and print status of this deployment's pods
ps, err := pods.List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("app=%s", deployment)})
if err != nil {
o.log.Errorf("failed to list pods: %s", err)
continue
}
for _, pod := range ps.Items {
p, err := pods.Get(ctx, pod.Name, metav1.GetOptions{})
if err != nil {
o.log.Errorf("failed to get pod %s: %s", pod.Name, err)
continue
}
j, err := json.Marshal(p.Status)
if err != nil {
o.log.Errorf("failed to serialize pod status %q: %s", pod.Name, err)
continue
}
o.log.Infof("pod %q status: %s", pod.Name, string(j))
}
}
return deploymentOk, deploymentErr
}

func (o *operator) Restart(ctx context.Context, deploymentNames []string) error {
Expand Down

0 comments on commit c0267fe

Please sign in to comment.