Skip to content

Commit

Permalink
Aws ecs transformation fix (#546)
Browse files Browse the repository at this point in the history
* fix: replacing min() with lasting()

* fix: removing dangerous usage of mean()

* fix: terraform lint

* fix: remove mean() on heartbeat monitor

---------

Co-authored-by: Florent DELAHAYE <[email protected]>
  • Loading branch information
Tulux and Florent DELAHAYE authored Jan 29, 2024
1 parent 45f46e4 commit 200d100
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
12 changes: 6 additions & 6 deletions modules/integration_aws-ecs-service/detectors-ecs-service.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ resource "signalfx_detector" "cpu_utilization" {
tags = compact(concat(local.common_tags, local.tags, var.extra_tags))

program_text = <<-EOF
signal = data('CPUUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow}).mean(by=['ServiceName'])${var.cpu_utilization_aggregation_function}${var.cpu_utilization_transformation_function}.publish('signal')
detect(when(signal > ${var.cpu_utilization_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.cpu_utilization_threshold_major}) and (not when(signal > ${var.cpu_utilization_threshold_critical}))).publish('MAJOR')
signal = data('CPUUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow})${var.cpu_utilization_aggregation_function}${var.cpu_utilization_transformation_function}.publish('signal')
detect(when(signal > ${var.cpu_utilization_threshold_critical}, lasting=%{if var.cpu_utilization_lasting_duration_critical == null}None%{else}'${var.cpu_utilization_lasting_duration_critical}'%{endif})).publish('CRIT')
detect(when(signal > ${var.cpu_utilization_threshold_major}, lasting=%{if var.cpu_utilization_lasting_duration_major == null}None%{else}'${var.cpu_utilization_lasting_duration_major}'%{endif}) and (not when(signal > ${var.cpu_utilization_threshold_critical}, lasting=%{if var.cpu_utilization_lasting_duration_critical == null}None%{else}'${var.cpu_utilization_lasting_duration_critical}'%{endif}))).publish('MAJOR')
EOF

rule {
Expand Down Expand Up @@ -75,9 +75,9 @@ resource "signalfx_detector" "memory_utilization" {
tags = compact(concat(local.common_tags, local.tags, var.extra_tags))

program_text = <<-EOF
signal = data('MemoryUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow}).mean(by=['ServiceName'])${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal')
detect(when(signal > ${var.memory_utilization_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.memory_utilization_threshold_major}) and (not when(signal > ${var.memory_utilization_threshold_critical}))).publish('MAJOR')
signal = data('MemoryUtilization', filter=filter('namespace', 'AWS/ECS') and filter('stat', 'mean') and filter('ServiceName', '*') and ${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal')
detect(when(signal > ${var.memory_utilization_threshold_critical}, lasting=%{if var.memory_utilization_lasting_duration_critical == null}None%{else}'${var.memory_utilization_lasting_duration_critical}'%{endif})).publish('CRIT')
detect(when(signal > ${var.memory_utilization_threshold_major}, lasting=%{if var.memory_utilization_lasting_duration_major == null}None%{else}'${var.memory_utilization_lasting_duration_major}'%{endif}) and (not when(signal > ${var.memory_utilization_threshold_critical}, lasting=%{if var.memory_utilization_lasting_duration_critical == null}None%{else}'${var.memory_utilization_lasting_duration_critical}'%{endif}))).publish('MAJOR')
EOF

rule {
Expand Down
25 changes: 22 additions & 3 deletions modules/integration_aws-ecs-service/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ variable "heartbeat_timeframe" {
variable "heartbeat_aggregation_function" {
description = "Aggregation function and group by for heartbeat detector (i.e. \".mean(by=['host'])\")"
type = string
default = ".mean(by=['ServiceName'])"
default = ""
}

# CPU_utilization detector
Expand Down Expand Up @@ -97,7 +97,7 @@ variable "cpu_utilization_aggregation_function" {
variable "cpu_utilization_transformation_function" {
description = "Transformation function for cpu_utilization detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
default = ""
}

variable "cpu_utilization_threshold_critical" {
Expand All @@ -106,12 +106,22 @@ variable "cpu_utilization_threshold_critical" {
default = 90
}

variable "cpu_utilization_lasting_duration_critical" {
type = string
default = "30m"
}

variable "cpu_utilization_threshold_major" {
description = "Major threshold for cpu_utilization detector"
type = number
default = 80
}

variable "cpu_utilization_lasting_duration_major" {
type = string
default = "5m"
}

# Memory_utilization detector

variable "memory_utilization_max_delay" {
Expand Down Expand Up @@ -165,7 +175,7 @@ variable "memory_utilization_aggregation_function" {
variable "memory_utilization_transformation_function" {
description = "Transformation function for memory_utilization detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
default = ""
}

variable "memory_utilization_threshold_critical" {
Expand All @@ -174,9 +184,18 @@ variable "memory_utilization_threshold_critical" {
default = 90
}

variable "memory_utilization_lasting_duration_critical" {
type = string
default = "30m"
}

variable "memory_utilization_threshold_major" {
description = "Major threshold for memory_utilization detector"
type = number
default = 85
}

variable "memory_utilization_lasting_duration_major" {
type = string
default = "5m"
}

0 comments on commit 200d100

Please sign in to comment.