diff --git a/modules/integration_aws-ecs-service/detectors-ecs-service.tf b/modules/integration_aws-ecs-service/detectors-ecs-service.tf index bbf649fa4..637125e72 100644 --- a/modules/integration_aws-ecs-service/detectors-ecs-service.tf +++ b/modules/integration_aws-ecs-service/detectors-ecs-service.tf @@ -35,9 +35,9 @@ resource "signalfx_detector" "cpu_utilization" { tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('CPUUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow}).mean(by=['ServiceName'])${var.cpu_utilization_aggregation_function}${var.cpu_utilization_transformation_function}.publish('signal') - detect(when(signal > ${var.cpu_utilization_threshold_critical})).publish('CRIT') - detect(when(signal > ${var.cpu_utilization_threshold_major}) and (not when(signal > ${var.cpu_utilization_threshold_critical}))).publish('MAJOR') + signal = data('CPUUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow})${var.cpu_utilization_aggregation_function}${var.cpu_utilization_transformation_function}.publish('signal') + detect(when(signal > ${var.cpu_utilization_threshold_critical}, lasting=%{if var.cpu_utilization_lasting_duration_critical == null}None%{else}'${var.cpu_utilization_lasting_duration_critical}'%{endif})).publish('CRIT') + detect(when(signal > ${var.cpu_utilization_threshold_major}, lasting=%{if var.cpu_utilization_lasting_duration_major == null}None%{else}'${var.cpu_utilization_lasting_duration_major}'%{endif}) and (not when(signal > ${var.cpu_utilization_threshold_critical}, lasting=%{if var.cpu_utilization_lasting_duration_critical == null}None%{else}'${var.cpu_utilization_lasting_duration_critical}'%{endif}))).publish('MAJOR') EOF rule { @@ -75,9 +75,9 @@ resource "signalfx_detector" "memory_utilization" { tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('MemoryUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow}).mean(by=['ServiceName'])${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') - detect(when(signal > ${var.memory_utilization_threshold_critical})).publish('CRIT') - detect(when(signal > ${var.memory_utilization_threshold_major}) and (not when(signal > ${var.memory_utilization_threshold_critical}))).publish('MAJOR') + signal = data('MemoryUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') + detect(when(signal > ${var.memory_utilization_threshold_critical}, lasting=%{if var.memory_utilization_lasting_duration_critical == null}None%{else}'${var.memory_utilization_lasting_duration_critical}'%{endif})).publish('CRIT') + detect(when(signal > ${var.memory_utilization_threshold_major}, lasting=%{if var.memory_utilization_lasting_duration_major == null}None%{else}'${var.memory_utilization_lasting_duration_major}'%{endif}) and (not when(signal > ${var.memory_utilization_threshold_critical}, lasting=%{if var.memory_utilization_lasting_duration_critical == null}None%{else}'${var.memory_utilization_lasting_duration_critical}'%{endif}))).publish('MAJOR') EOF rule { diff --git a/modules/integration_aws-ecs-service/variables.tf b/modules/integration_aws-ecs-service/variables.tf index d6987c72d..ed5321b26 100644 --- a/modules/integration_aws-ecs-service/variables.tf +++ b/modules/integration_aws-ecs-service/variables.tf @@ -41,7 +41,7 @@ variable "heartbeat_timeframe" { variable "heartbeat_aggregation_function" { description = "Aggregation function and group by for heartbeat detector (i.e. \".mean(by=['host'])\")" type = string - default = ".mean(by=['ServiceName'])" + default = "" } # CPU_utilization detector @@ -97,7 +97,7 @@ variable "cpu_utilization_aggregation_function" { variable "cpu_utilization_transformation_function" { description = "Transformation function for cpu_utilization detector (i.e. \".mean(over='5m')\")" type = string - default = ".min(over='5m')" + default = "" } variable "cpu_utilization_threshold_critical" { @@ -106,12 +106,22 @@ variable "cpu_utilization_threshold_critical" { default = 90 } +variable "cpu_utilization_lasting_duration_critical" { + type = string + default = "30m" +} + variable "cpu_utilization_threshold_major" { description = "Major threshold for cpu_utilization detector" type = number default = 80 } +variable "cpu_utilization_lasting_duration_major" { + type = string + default = "5m" +} + # Memory_utilization detector variable "memory_utilization_max_delay" { @@ -165,7 +175,7 @@ variable "memory_utilization_aggregation_function" { variable "memory_utilization_transformation_function" { description = "Transformation function for memory_utilization detector (i.e. \".mean(over='5m')\")" type = string - default = ".min(over='5m')" + default = "" } variable "memory_utilization_threshold_critical" { @@ -174,9 +184,18 @@ variable "memory_utilization_threshold_critical" { default = 90 } +variable "memory_utilization_lasting_duration_critical" { + type = string + default = "30m" +} + variable "memory_utilization_threshold_major" { description = "Major threshold for memory_utilization detector" type = number default = 85 } +variable "memory_utilization_lasting_duration_major" { + type = string + default = "5m" +}