From 5cd5388a581b26ec7bedebc34080ab6223310e38 Mon Sep 17 00:00:00 2001 From: michael stack Date: Wed, 4 Jun 2025 13:27:04 -0700 Subject: [PATCH] * k8s/agent-scaler/agent-scaler.sh Add --ignore-not-found=true to delete. Cleans up some complaint when two scripts running beside each other and one deletes first (happens when testing changes to this script). Minor item. Also, clean up 'Failed' jobs else they just hang out. --- k8s/agent-scaler/agent-scaler.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/k8s/agent-scaler/agent-scaler.sh b/k8s/agent-scaler/agent-scaler.sh index 35e8016..f2946b7 100755 --- a/k8s/agent-scaler/agent-scaler.sh +++ b/k8s/agent-scaler/agent-scaler.sh @@ -48,7 +48,16 @@ while true; do # Filter by AGENT_NAME and check 3rd column for "1/1" (completions) for job in $(kubectl get jobs -n "${namespace}" --no-headers | { grep -E -e "^${AGENT_NAME}-[0-9]+(-[0-9]+)?\\s" || true; } | awk '$3 == "1/1" {print $1}'); do echo "=== Job $job Completed (1/1) - deleting from get jobs === (AGENT_NAME: ${AGENT_NAME})" - kubectl delete job "$job" -n "${namespace}" + kubectl delete job "$job" -n "${namespace}" --ignore-not-found=true + done + + # cleanup explicitly Failed jobs + # Filter by AGENT_NAME and job status condition "Failed"="True" + for job in $(kubectl get jobs -n "${namespace}" -o jsonpath='{range .items[?(@.status.conditions[*].type=="Failed" && @.status.conditions[*].status=="True")]}{.metadata.name}{"\\n"}{end}' 2>/dev/null | { grep -E "^${AGENT_NAME}-[0-9]+(-[0-9]+)?$" || true; }); do + if [ -n "$job" ]; then # Ensure job name is not empty + echo "=== Job $job is Failed - deleting === (AGENT_NAME: ${AGENT_NAME})" + kubectl delete job "$job" -n "${namespace}" --ignore-not-found=true + fi done # cleanup failed/completed jobs by looking at pods for the current AGENT_NAME