From c3850d96ed2d38131d4552e46887d94f9d327dc1 Mon Sep 17 00:00:00 2001 From: Dery Rahman Ahaddienata Date: Thu, 11 Dec 2025 16:18:19 +0700 Subject: [PATCH] fix: detecting breach with state --- core/scheduler/service/job_sla_predictor_service.go | 3 ++- core/scheduler/service/job_sla_predictor_service_test.go | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/scheduler/service/job_sla_predictor_service.go b/core/scheduler/service/job_sla_predictor_service.go index ad8eb73edb..37e07ecdb0 100644 --- a/core/scheduler/service/job_sla_predictor_service.go +++ b/core/scheduler/service/job_sla_predictor_service.go @@ -493,7 +493,8 @@ func (s *JobSLAPredictorService) identifySLABreachRootCauses(jobTarget *schedule } else { var state *JobState // condition 1: T(now)>= S(u|j) and the job u has not completed yet - if (referenceTime.After(inferredSLA) && jobRun != nil && jobRun.JobEndTime == nil) || (jobRun != nil && jobRun.JobEndTime != nil && jobRun.JobEndTime.After(inferredSLA)) { + if (referenceTime.After(inferredSLA) && ((jobRun != nil && jobRun.JobEndTime == nil) || jobRun.JobStatus != scheduler.StateSuccess.String())) || + (jobRun != nil && jobRun.JobEndTime != nil && jobRun.JobEndTime.After(inferredSLA)) { // add to jobStatePaths state = &JobState{ JobSLAState: *jobSLAStates[job.JobName], diff --git a/core/scheduler/service/job_sla_predictor_service_test.go b/core/scheduler/service/job_sla_predictor_service_test.go index a1fb528b14..c08568e4d0 100644 --- a/core/scheduler/service/job_sla_predictor_service_test.go +++ b/core/scheduler/service/job_sla_predictor_service_test.go @@ -740,6 +740,7 @@ func TestIdentifySLABreaches(t *testing.T) { JobRuns: map[scheduler.JobName]*scheduler.JobRunSummary{ jobALineage.JobName: { ScheduledAt: scheduledAt.Add(-25 * time.Minute), + JobStatus: scheduler.StateSuccess.String(), TaskStartTime: &jobCTaskStartTime, TaskEndTime: &jobCTaskEndTime, JobEndTime: &jobCTaskEndTime, @@ -1023,6 +1024,7 @@ func TestIdentifySLABreaches(t *testing.T) { JobRuns: map[scheduler.JobName]*scheduler.JobRunSummary{ jobALineage.JobName: { ScheduledAt: scheduledAt.Add(-15 * time.Minute), + JobStatus: scheduler.StateSuccess.String(), TaskStartTime: &jobBTaskStartTime, TaskEndTime: &jobBTaskEndTime, JobEndTime: &jobBTaskEndTime, @@ -1039,6 +1041,7 @@ func TestIdentifySLABreaches(t *testing.T) { JobRuns: map[scheduler.JobName]*scheduler.JobRunSummary{ jobALineage.JobName: { ScheduledAt: scheduledAt.Add(-25 * time.Minute), + JobStatus: scheduler.StateSuccess.String(), TaskStartTime: &jobCTaskStartTime, TaskEndTime: &jobCTaskEndTime, JobEndTime: &jobCTaskEndTime, @@ -1170,10 +1173,12 @@ func TestIdentifySLABreach(t *testing.T) { jobTargetLineageMap[jobName].JobRuns["job-1"].TaskStartTime = nil jobTargetLineageMap[jobName].JobRuns["job-1"].TaskEndTime = nil jobTargetLineageMap[jobName].JobRuns["job-1"].JobEndTime = nil + jobTargetLineageMap[jobName].JobRuns["job-1"].JobStatus = "" } // job-18 is running late jobTargetLineageMap["job-18"].JobRuns["job-1"].TaskEndTime = nil jobTargetLineageMap["job-18"].JobRuns["job-1"].JobEndTime = nil + jobTargetLineageMap["job-18"].JobRuns["job-1"].JobStatus = "" skipJobNames := map[scheduler.JobName]bool{} @@ -1208,10 +1213,12 @@ func TestIdentifySLABreach(t *testing.T) { jobTargetLineageMap[jobName].JobRuns["job-1"].TaskStartTime = nil jobTargetLineageMap[jobName].JobRuns["job-1"].TaskEndTime = nil jobTargetLineageMap[jobName].JobRuns["job-1"].JobEndTime = nil + jobTargetLineageMap[jobName].JobRuns["job-1"].JobStatus = "" } // job-18 is running late jobTargetLineageMap["job-18"].JobRuns["job-1"].TaskEndTime = nil jobTargetLineageMap["job-18"].JobRuns["job-1"].JobEndTime = nil + jobTargetLineageMap["job-18"].JobRuns["job-1"].JobStatus = "" skipJobNames := map[scheduler.JobName]bool{} @@ -1270,6 +1277,7 @@ func generateLineageWithSLAStates(slaPredictorService *service.JobSLAPredictorSe jobRunSummary.TaskStartTime = &taskStartTime jobRunSummary.TaskEndTime = &taskEndTime jobRunSummary.JobEndTime = &taskEndTime + jobRunSummary.JobStatus = scheduler.StateSuccess.String() } return jobTargetLineageMap