diff --git a/internal/pkg/janitor/job.go b/internal/pkg/janitor/job.go index f11b4ce..a79546b 100644 --- a/internal/pkg/janitor/job.go +++ b/internal/pkg/janitor/job.go @@ -14,8 +14,8 @@ import ( ) var ( - cleanupJobsMethod = telemetry.NewMethod("db_connection", "cleanup_jobs") - workerMethod = telemetry.NewMethod("janitor", "worker") + cleanupJobsMethod = telemetry.NewMethod("cleanup_jobs", "janitor") + workerMethod = telemetry.NewMethod("worker", "janitor") ctx = context.Background() ) @@ -54,6 +54,7 @@ func (j *Janitor) worker() bool { // if no jobs found, return false if len(jobs) == 0 { + workerMethod.CountSuccess("no_jobs") return false } @@ -64,7 +65,7 @@ func (j *Janitor) worker() bool { go func(idx int, job *job.Job) { defer wg.Done() if err := j.cleanup(job); err != nil { - cleanupJobsMethod.LogAndCountError(err, "cleanup") + cleanupJobsMethod.LogAndCountError(err, "cleanup", job.CommandName, job.ClusterName) } }(i, jb) } @@ -117,19 +118,19 @@ func (j *Janitor) queryJobs(sess *database.Session) ([]*job.Job, error) { // cleanup calls the cleanup handler for a job func (j *Janitor) cleanup(jb *job.Job) error { - cleanupJobsMethod.CountRequest() + cleanupJobsMethod.CountRequest(jb.CommandName, jb.ClusterName) // Call cleanup handler handler := j.commandHandlers[jb.CommandID] if handler != nil { cluster := j.clusters[jb.ClusterID] if err := handler.Cleanup(ctx, jb.ID, cluster); err != nil { - cleanupJobsMethod.CountError("cleanup_handler") + cleanupJobsMethod.CountError("cleanup_handler", jb.CommandName, jb.ClusterName) return errors.Wrap(err, "cleanup_handler") } } else { // count requests for jobs that don't have a cleanup handler - cleanupJobsMethod.CountRequest("no_cleanup_handler") + cleanupJobsMethod.CountRequest("no_cleanup_handler", jb.CommandName, jb.ClusterName) } return nil diff --git a/internal/pkg/object/command/ecs/ecs.go b/internal/pkg/object/command/ecs/ecs.go index 6b7ed7e..d53e202 100644 --- a/internal/pkg/object/command/ecs/ecs.go +++ b/internal/pkg/object/command/ecs/ecs.go @@ -119,8 +119,8 @@ const ( var ( errMissingTemplate = fmt.Errorf("task definition template is required") errNoTasksAvailable = fmt.Errorf("no tasks available to retrieve logs") - cleanupMethod = telemetry.NewMethod("ecs", "cleanup") - handlerMethod = telemetry.NewMethod("ecs", "handler") + cleanupMethod = telemetry.NewMethod("cleanup", "ecs") + handlerMethod = telemetry.NewMethod("handler", "ecs") ) func New(commandCtx *heimdallContext.Context) (plugin.Handler, error) { @@ -675,10 +675,8 @@ func (execCtx *executionContext) retrieveLogs(ctx context.Context) error { return nil } - -// cleanup stops all ECS tasks that were started by the given job func (e *commandContext) Cleanup(ctx context.Context, jobID string, c *cluster.Cluster) error { - + cleanupMethod.CountRequest() // Resolve cluster context to get cluster name clusterContext := &clusterContext{} if err := c.Context.Unmarshal(clusterContext); err != nil { @@ -702,6 +700,7 @@ func (e *commandContext) Cleanup(ctx context.Context, jobID string, c *cluster.C StartedBy: aws.String(startedByValue), }) if err != nil { + cleanupMethod.CountError("list_tasks") return err } allTaskARNs = append(allTaskARNs, listTasksOutput.TaskArns...) @@ -709,6 +708,7 @@ func (e *commandContext) Cleanup(ctx context.Context, jobID string, c *cluster.C if len(allTaskARNs) == 0 { // No tasks found, nothing to clean up + cleanupMethod.CountSuccess("no_tasks_found") return nil } @@ -718,6 +718,7 @@ func (e *commandContext) Cleanup(ctx context.Context, jobID string, c *cluster.C Tasks: allTaskARNs, }) if err != nil { + cleanupMethod.CountError("describe_tasks") return err } @@ -739,8 +740,9 @@ func (e *commandContext) Cleanup(ctx context.Context, jobID string, c *cluster.C cleanupMethod.LogAndCountError(err, fmt.Sprintf("failed to stop task %s", aws.ToString(task.TaskArn))) continue } + cleanupMethod.CountSuccess("stop_task") } - + cleanupMethod.CountSuccess() return nil }