From 8bfd399ea9745cd939a15c8e767edd25240980a6 Mon Sep 17 00:00:00 2001 From: Jeff Ortel Date: Tue, 9 Jul 2024 07:12:22 -0500 Subject: [PATCH] :sparkles: Detect and report pending pod unschedulable. (#712) Report event with reason why pending pod cannot be scheduled. Not sure if this should be merged (or not). Troubleshooting why k8s won't run a pod seems best investigated by snooping around the cluster. After all ... pods waiting to be scheduled is anticipated (normal) thing on a busy cluster. This PR opened only to (potentially) help with troubleshooting broken CI. It would be better for the CI test to fetch and log task pods stuck at pending. Signed-off-by: Jeff Ortel --- task/manager.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/task/manager.go b/task/manager.go index 894cd9fe9..15a2e5158 100644 --- a/task/manager.go +++ b/task/manager.go @@ -54,6 +54,7 @@ const ( ImageError = "ImageError" PodNotFound = "PodNotFound" PodCreated = "PodCreated" + PodPending = "PodPending" PodRunning = "PodRunning" Preempted = "Preempted" PodSucceeded = "PodSucceeded" @@ -1271,6 +1272,13 @@ func (r *Task) podPending(pod *core.Pod) { status, pod.Status.ContainerStatuses...) started := 0 + for _, cnd := range pod.Status.Conditions { + if cnd.Type == core.PodScheduled && + cnd.Reason == core.PodReasonUnschedulable { + r.Event(PodPending, cnd.Message) + return + } + } for _, status := range status { state := status.State if state.Waiting != nil { @@ -1287,6 +1295,8 @@ func (r *Task) podPending(pod *core.Pod) { r.Event(ImageError, waiting.Reason) r.State = Failed return + } else { + r.Event(PodPending, waiting.Reason) } } if status.Started == nil {