Skip to content

Commit

Permalink
feat: expose pod failure reason
Browse files Browse the repository at this point in the history
  • Loading branch information
alexec committed May 19, 2021
1 parent 2a23cb8 commit 9bef0df
Show file tree
Hide file tree
Showing 15 changed files with 291 additions and 201 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## v0.0.14 (2021-05-18)

* [2a23cb8](https://github.com/argoproj/argo-workflows/commit/2a23cb8abc4343272d4dd04f493d888694923114) fix: scale to 1 rather than 0 on start

### Contributors

* Alex Collins

## v0.0.13 (2021-05-18)

* [ea4e910](https://github.com/argoproj/argo-workflows/commit/ea4e910597a1de3a99406523fdab0feadf14a116) docs: updated CHANGELOG.md
Expand Down
374 changes: 208 additions & 166 deletions api/v1alpha1/generated.pb.go

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions api/v1alpha1/generated.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions api/v1alpha1/step_phase_message.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ func (m StepPhaseMessage) GetPhase() StepPhase {
return StepPhase(strings.Split(string(m), "/")[0])
}

func (m StepPhaseMessage) GetMessage() string {
func (m StepPhaseMessage) GetReason() string {
return strings.Split(string(m), "/")[1]
}

func NewStepPhaseMessage(p StepPhase, m string) StepPhaseMessage {
return StepPhaseMessage(fmt.Sprintf("%s/%s", p, m))
func (m StepPhaseMessage) GetMessage() string {
return strings.Split(string(m), "/")[2]
}

func NewStepPhaseMessage(phase StepPhase, reason, message string) StepPhaseMessage {
return StepPhaseMessage(fmt.Sprintf("%s/%s/%s", phase, reason, message))
}

func MinStepPhaseMessage(v ...StepPhaseMessage) StepPhaseMessage {
Expand All @@ -27,5 +31,5 @@ func MinStepPhaseMessage(v ...StepPhaseMessage) StepPhaseMessage {
}
}
}
return NewStepPhaseMessage(StepUnknown, "")
return NewStepPhaseMessage(StepUnknown, "", "")
}
3 changes: 2 additions & 1 deletion api/v1alpha1/step_phase_message_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import (
)

func TestMinStepPhaseMessage(t *testing.T) {
x := MinStepPhaseMessage(NewStepPhaseMessage(StepFailed, "foo"), NewStepPhaseMessage(StepRunning, "bar"))
x := MinStepPhaseMessage(NewStepPhaseMessage(StepFailed, "baz", "foo"), NewStepPhaseMessage(StepRunning, "qux", "bar"))
assert.Equal(t, StepFailed, x.GetPhase())
assert.Equal(t, "baz", x.GetReason())
assert.Equal(t, "foo", x.GetMessage())
}
1 change: 1 addition & 0 deletions api/v1alpha1/step_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

type StepStatus struct {
Phase StepPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase,casttype=StepPhase"`
Reason string `json:"reason,omitempty" protobuf:"bytes,8,opt,name=reason"`
Message string `json:"message,omitempty" protobuf:"bytes,2,opt,name=message"`
Replicas uint32 `json:"replicas,omitempty" protobuf:"varint,5,opt,name=replicas"`
Selector string `json:"selector,omitempty" protobuf:"bytes,7,opt,name=selector"`
Expand Down
1 change: 1 addition & 0 deletions api/v1alpha1/step_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
// +kubebuilder:subresource:status
// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas,selectorpath=.status.selector
// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase`
// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.reason`
// +kubebuilder:printcolumn:name="Message",type=string,JSONPath=`.status.message`
// +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.spec.replicas`
// +kubebuilder:printcolumn:name="Current",type=string,JSONPath=`.status.replicas`
Expand Down
5 changes: 5 additions & 0 deletions config/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,9 @@ spec:
- jsonPath: .status.phase
name: Phase
type: string
- jsonPath: .status.reason
name: Reason
type: string
- jsonPath: .status.message
name: Message
type: string
Expand Down Expand Up @@ -3599,6 +3602,8 @@ spec:
- Succeeded
- Failed
type: string
reason:
type: string
replicas:
format: int32
type: integer
Expand Down
5 changes: 5 additions & 0 deletions config/crd/bases/dataflow.argoproj.io_steps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ spec:
- jsonPath: .status.phase
name: Phase
type: string
- jsonPath: .status.reason
name: Reason
type: string
- jsonPath: .status.message
name: Message
type: string
Expand Down Expand Up @@ -2646,6 +2649,8 @@ spec:
- Succeeded
- Failed
type: string
reason:
type: string
replicas:
format: int32
type: integer
Expand Down
5 changes: 5 additions & 0 deletions config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,9 @@ spec:
- jsonPath: .status.phase
name: Phase
type: string
- jsonPath: .status.reason
name: Reason
type: string
- jsonPath: .status.message
name: Message
type: string
Expand Down Expand Up @@ -3599,6 +3602,8 @@ spec:
- Succeeded
- Failed
type: string
reason:
type: string
replicas:
format: int32
type: integer
Expand Down
5 changes: 5 additions & 0 deletions config/dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,9 @@ spec:
- jsonPath: .status.phase
name: Phase
type: string
- jsonPath: .status.reason
name: Reason
type: string
- jsonPath: .status.message
name: Message
type: string
Expand Down Expand Up @@ -3599,6 +3602,8 @@ spec:
- Succeeded
- Failed
type: string
reason:
type: string
replicas:
format: int32
type: integer
Expand Down
5 changes: 5 additions & 0 deletions config/quick-start.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1850,6 +1850,9 @@ spec:
- jsonPath: .status.phase
name: Phase
type: string
- jsonPath: .status.reason
name: Reason
type: string
- jsonPath: .status.message
name: Message
type: string
Expand Down Expand Up @@ -3599,6 +3602,8 @@ spec:
- Succeeded
- Failed
type: string
reason:
type: string
replicas:
format: int32
type: integer
Expand Down
38 changes: 19 additions & 19 deletions manager/controllers/infer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,41 +23,41 @@ var ErrorReasons = map[string]bool{
"OOMKilled": true,
}

func inferPhase(pod corev1.Pod) (dfv1.StepPhase, string) {
min := dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "")
func inferPhase(pod corev1.Pod) (dfv1.StepPhase, string, string) {
min := dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "", "")
for _, s := range pod.Status.InitContainerStatuses {
min = dfv1.MinStepPhaseMessage(min, func() dfv1.StepPhaseMessage {
if s.State.Running != nil {
return dfv1.NewStepPhaseMessage(dfv1.StepRunning, "")
} else if s.State.Waiting != nil {
if ErrorReasons[s.State.Waiting.Reason] {
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, s.State.Waiting.Message)
return dfv1.NewStepPhaseMessage(dfv1.StepRunning, "", "")
} else if x := s.State.Waiting; x != nil {
if ErrorReasons[x.Reason] {
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, x.Reason, x.Message)
} else {
return dfv1.NewStepPhaseMessage(dfv1.StepPending, s.State.Waiting.Message)
return dfv1.NewStepPhaseMessage(dfv1.StepPending, x.Reason, x.Message)
}
}
return dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "")
return dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "", "")
}())
}
for _, s := range pod.Status.ContainerStatuses {
min = dfv1.MinStepPhaseMessage(min, func() dfv1.StepPhaseMessage {
if s.State.Terminated != nil {
if int(s.State.Terminated.ExitCode) == 0 {
return dfv1.NewStepPhaseMessage(dfv1.StepSucceeded, "")
if x := s.State.Terminated; x != nil {
if int(x.ExitCode) == 0 {
return dfv1.NewStepPhaseMessage(dfv1.StepSucceeded, "", "")
} else {
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, s.State.Terminated.Message)
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, x.Reason, x.Message)
}
} else if s.State.Running != nil {
return dfv1.NewStepPhaseMessage(dfv1.StepRunning, "")
} else if s.State.Waiting != nil {
if ErrorReasons[s.State.Waiting.Reason] {
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, s.State.Waiting.Message)
return dfv1.NewStepPhaseMessage(dfv1.StepRunning, "", "")
} else if x := s.State.Waiting; x != nil {
if ErrorReasons[x.Reason] {
return dfv1.NewStepPhaseMessage(dfv1.StepFailed, x.Reason, x.Message)
} else {
return dfv1.NewStepPhaseMessage(dfv1.StepPending, s.State.Waiting.Message)
return dfv1.NewStepPhaseMessage(dfv1.StepPending, x.Reason, x.Message)
}
}
return dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "")
return dfv1.NewStepPhaseMessage(dfv1.StepUnknown, "", "")
}())
}
return min.GetPhase(), min.GetMessage()
return min.GetPhase(), min.GetReason(), min.GetMessage()
}
21 changes: 13 additions & 8 deletions manager/controllers/infer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ import (

func Test_inferPhase(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
p, msg := inferPhase(corev1.Pod{})
p, reason, msg := inferPhase(corev1.Pod{})
assert.Equal(t, p, dfv1.StepUnknown)
assert.Equal(t, "", reason)
assert.Equal(t, "", msg)
})
t.Run("Init", func(t *testing.T) {
p, msg := inferPhase(corev1.Pod{
p, reason, msg := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
InitContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -26,10 +27,11 @@ func Test_inferPhase(t *testing.T) {
},
})
assert.Equal(t, p, dfv1.StepPending)
assert.Equal(t, "", reason)
assert.Equal(t, "", msg)
})
t.Run("Init", func(t *testing.T) {
p, _ := inferPhase(corev1.Pod{
p, _, _ := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
InitContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -41,7 +43,7 @@ func Test_inferPhase(t *testing.T) {
assert.Equal(t, p, dfv1.StepRunning)
})
t.Run("CrashLoopBackOff", func(t *testing.T) {
p, msg := inferPhase(corev1.Pod{
p, reason, msg := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -54,10 +56,11 @@ func Test_inferPhase(t *testing.T) {
},
})
assert.Equal(t, p, dfv1.StepFailed)
assert.Equal(t, "CrashLoopBackOff", reason)
assert.Equal(t, "foo", msg)
})
t.Run("ContainerCreating", func(t *testing.T) {
p, msg := inferPhase(corev1.Pod{
p, reason, msg := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -70,10 +73,11 @@ func Test_inferPhase(t *testing.T) {
},
})
assert.Equal(t, p, dfv1.StepPending)
assert.Equal(t, "ContainerCreating", reason)
assert.Equal(t, "foo", msg)
})
t.Run("Running", func(t *testing.T) {
p, _ := inferPhase(corev1.Pod{
p, _, _ := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -85,7 +89,7 @@ func Test_inferPhase(t *testing.T) {
assert.Equal(t, p, dfv1.StepRunning)
})
t.Run("OOMKilled", func(t *testing.T) {
p, msg := inferPhase(corev1.Pod{
p, reason, msg := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand All @@ -99,10 +103,11 @@ func Test_inferPhase(t *testing.T) {
},
})
assert.Equal(t, p, dfv1.StepFailed)
assert.Equal(t, "OOMKilled", reason)
assert.Equal(t, "foo", msg)
})
t.Run("Completed", func(t *testing.T) {
p, _ := inferPhase(corev1.Pod{
p, _, _ := inferPhase(corev1.Pod{
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{State: corev1.ContainerState{
Expand Down
6 changes: 3 additions & 3 deletions manager/controllers/step_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ func (r *StepReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
return ctrl.Result{}, fmt.Errorf("failed to delete excess pod %s: %w", pod.Name, err)
}
} else {
phase, message := inferPhase(pod)
x := dfv1.MinStepPhaseMessage(dfv1.NewStepPhaseMessage(newStatus.Phase, newStatus.Message), dfv1.NewStepPhaseMessage(phase, message))
newStatus.Phase, newStatus.Message = x.GetPhase(), x.GetMessage()
phase, reason, message := inferPhase(pod)
x := dfv1.MinStepPhaseMessage(dfv1.NewStepPhaseMessage(newStatus.Phase, newStatus.Reason, newStatus.Message), dfv1.NewStepPhaseMessage(phase, reason, message))
newStatus.Phase, newStatus.Reason, newStatus.Message = x.GetPhase(), x.GetReason(), x.GetMessage()
// if the main container has terminated, kill all sidecars
mainCtrTerminated := false
for _, s := range pod.Status.ContainerStatuses {
Expand Down

0 comments on commit 9bef0df

Please sign in to comment.