Skip to content

Commit 6de6622

Browse files
MElHousseinturboFei
andcommitted
[KYUUBI #7214] Fix kubernetes container state
### Why are the changes needed? This PR fixes #7195 where if `kyuubi.kubernetes.application.state.source` is `CONTAINER` and kubernetes fails to pull the image, or the image name is not valid, or any failure ocurs, kyuubi marks the application as pending, forever. ### How was this patch tested? - Added unit tests in KubernetesApplicationOperationSuite: To run the targeted suite: `./build/mvn -pl kyuubi-server -DskipITs -Dtest=org.apache.kyuubi.engine.KubernetesApplicationOperationSuite test` ### Was this patch authored or co-authored using generative AI tooling? No Closes #7214 from moelhoussein/fix-kubernetes-container-state. Closes #7214 4f83667 [Wang, Fei] Revert "reformatted" e869a46 [MElHoussein] reformatted 8ede9aa [Wang, Fei] code style d3b9cd2 [MElHoussein] revert unrelated test/url overload changes; keep only container waiting-state logic change 7d346fc [MElHoussein] engine(k8s): treat only specific waiting reasons as PENDING; others FAILED; handle empty reason as PENDING; unify constant as PENDING_WAITING_REASONS; add buildSparkAppUrl overload; restore POD IP URL test; add tests for failure waiting reasons; revert .idea/vcs.xml ca94d64 [MElHoussein] Fixing container state Lead-authored-by: MElHoussein <[email protected]> Co-authored-by: Wang, Fei <[email protected]> Signed-off-by: Wang, Fei <[email protected]> (cherry picked from commit b5d7f58) Signed-off-by: Wang, Fei <[email protected]>
1 parent 96ebc5d commit 6de6622

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ object KubernetesApplicationOperation extends Logging {
561561
val KUBERNETES_SERVICE_HOST = "KUBERNETES_SERVICE_HOST"
562562
val KUBERNETES_SERVICE_PORT = "KUBERNETES_SERVICE_PORT"
563563
val SPARK_UI_PORT_NAME = "spark-ui"
564+
private val PENDING_WAITING_REASONS: Set[String] = Set("ContainerCreating", "PodInitializing")
564565

565566
def toLabel(tag: String): String = s"label: $LABEL_KYUUBI_UNIQUE_KEY=$tag"
566567

@@ -638,7 +639,8 @@ object KubernetesApplicationOperation extends Logging {
638639
def containerStateToApplicationState(containerState: ContainerState): ApplicationState = {
639640
// https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states
640641
if (containerState.getWaiting != null) {
641-
PENDING
642+
val reasonOpt = Option(containerState.getWaiting.getReason).map(_.trim).filter(_.nonEmpty)
643+
if (reasonOpt.isEmpty || PENDING_WAITING_REASONS.contains(reasonOpt.get)) PENDING else FAILED
642644
} else if (containerState.getRunning != null) {
643645
RUNNING
644646
} else if (containerState.getTerminated == null) {

kyuubi-server/src/test/scala/org/apache/kyuubi/engine/KubernetesApplicationOperationSuite.scala

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@
1717

1818
package org.apache.kyuubi.engine
1919

20+
import io.fabric8.kubernetes.api.model.{ContainerState, ContainerStateWaiting}
21+
2022
import org.apache.kyuubi.{KyuubiException, KyuubiFunSuite}
2123
import org.apache.kyuubi.config.KyuubiConf
24+
import org.apache.kyuubi.engine.ApplicationState.{FAILED, PENDING}
2225

2326
class KubernetesApplicationOperationSuite extends KyuubiFunSuite {
2427

@@ -113,4 +116,46 @@ class KubernetesApplicationOperationSuite extends KyuubiFunSuite {
113116
KubernetesInfo(Some("c1"), None),
114117
KubernetesInfo(None, Some("ns1"))))
115118
}
119+
120+
test("containerStateToApplicationState waiting reasons") {
121+
// Only valid pending reasons: ContainerCreating and PodInitializing
122+
val pendingWaitingReasons = Set("ContainerCreating", "PodInitializing")
123+
124+
pendingWaitingReasons.foreach { reason =>
125+
val containerState = new ContainerState()
126+
val waiting = new ContainerStateWaiting()
127+
waiting.setReason(reason)
128+
containerState.setWaiting(waiting)
129+
130+
val result = KubernetesApplicationOperation.containerStateToApplicationState(containerState)
131+
assert(result === PENDING)
132+
}
133+
}
134+
135+
test("containerStateToApplicationState failure reasons and empty reason") {
136+
val failureReasons = Set(
137+
"ErrImagePull",
138+
"ImagePullBackOff",
139+
"CrashLoopBackOff",
140+
"CreateContainerConfigError")
141+
142+
failureReasons.foreach { reason =>
143+
val containerState = new ContainerState()
144+
val waiting = new ContainerStateWaiting()
145+
waiting.setReason(reason)
146+
containerState.setWaiting(waiting)
147+
148+
val result = KubernetesApplicationOperation.containerStateToApplicationState(containerState)
149+
assert(result === FAILED)
150+
}
151+
152+
// Empty/null reason should be treated as PENDING (still initializing)
153+
val containerStateEmpty = new ContainerState()
154+
val waitingEmpty = new ContainerStateWaiting()
155+
waitingEmpty.setReason(null)
156+
containerStateEmpty.setWaiting(waitingEmpty)
157+
val resultEmpty =
158+
KubernetesApplicationOperation.containerStateToApplicationState(containerStateEmpty)
159+
assert(resultEmpty === PENDING)
160+
}
116161
}

0 commit comments

Comments
 (0)