From 8f39776711a37d9325671844c332dc47c8e3a220 Mon Sep 17 00:00:00 2001 From: Emmanuel Clisson Date: Thu, 12 Sep 2024 16:43:33 +0200 Subject: [PATCH 1/3] Fix memory left --- docs/severity.md | 2 +- .../integration_gcp-cloud-sql-common/README.md | 3 ++- .../conf/03-memory_utilization.yaml | 16 ++++++++-------- .../detectors-gen.tf | 12 ++++++------ .../variables-gen.tf | 6 +++--- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/severity.md b/docs/severity.md index 149003026..e3f668701 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -759,7 +759,7 @@ |GCP Cloud SQL heartbeat|X|-|-|-|-| |GCP Cloud SQL cpu utilization|X|X|-|-|-| |GCP Cloud SQL disk utilization|X|X|-|-|-| -|GCP Cloud SQL memory utilization|X|X|-|-|-| +|GCP Cloud SQL memory left|X|X|-|-|-| ## integration_gcp-cloud-sql-failover diff --git a/modules/integration_gcp-cloud-sql-common/README.md b/modules/integration_gcp-cloud-sql-common/README.md index eb76ad931..6ac8966cc 100644 --- a/modules/integration_gcp-cloud-sql-common/README.md +++ b/modules/integration_gcp-cloud-sql-common/README.md @@ -84,7 +84,7 @@ This module creates the following SignalFx detectors which could contain one or |GCP Cloud SQL heartbeat|X|-|-|-|-| |GCP Cloud SQL cpu utilization|X|X|-|-|-| |GCP Cloud SQL disk utilization|X|X|-|-|-| -|GCP Cloud SQL memory utilization|X|X|-|-|-| +|GCP Cloud SQL memory left|X|X|-|-|-| ## How to collect required metrics? @@ -105,6 +105,7 @@ Here is the list of required metrics for detectors in this module. * `database/cpu/usage_time` * `database/cpu/utilization` * `database/disk/utilization` +* `database/memory/components` * `database/memory/utilization` diff --git a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml index f08ea2629..d9086b8e9 100644 --- a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml +++ b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml @@ -1,21 +1,21 @@ module: "GCP Cloud SQL" -name: "Memory utilization" +name: "Memory left" id: "memory_utilization" -transformation: ".min(over='15m').scale(100)" +transformation: ".min(over='15m')" aggregation: true signals: signal: - metric: "database/memory/utilization" + metric: "database/memory/components" rules: critical: - threshold: 95 - comparator: ">" + threshold: 10 + comparator: "<" major: - threshold: 90 - comparator: ">" - dependency: "critical" \ No newline at end of file + threshold: 20 + comparator: "<" + dependency: "critical" diff --git a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf index 13526f6ab..d8edec0f5 100644 --- a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf +++ b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf @@ -107,20 +107,20 @@ EOF } resource "signalfx_detector" "memory_utilization" { - name = format("%s %s", local.detector_name_prefix, "GCP Cloud SQL memory utilization") + name = format("%s %s", local.detector_name_prefix, "GCP Cloud SQL memory left") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('database/memory/utilization', filter=${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') - detect(when(signal > ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') - detect(when(signal > ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal > ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') + signal = data('database/memory/components', filter=${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') + detect(when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal < ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') EOF rule { - description = "is too high > ${var.memory_utilization_threshold_critical}" + description = "is too low < ${var.memory_utilization_threshold_critical}" severity = "Critical" detect_label = "CRIT" disabled = coalesce(var.memory_utilization_disabled_critical, var.memory_utilization_disabled, var.detectors_disabled) @@ -132,7 +132,7 @@ EOF } rule { - description = "is too high > ${var.memory_utilization_threshold_major}" + description = "is too low < ${var.memory_utilization_threshold_major}" severity = "Major" detect_label = "MAJOR" disabled = coalesce(var.memory_utilization_disabled_major, var.memory_utilization_disabled, var.detectors_disabled) diff --git a/modules/integration_gcp-cloud-sql-common/variables-gen.tf b/modules/integration_gcp-cloud-sql-common/variables-gen.tf index 15d467cc0..3d4687cc4 100644 --- a/modules/integration_gcp-cloud-sql-common/variables-gen.tf +++ b/modules/integration_gcp-cloud-sql-common/variables-gen.tf @@ -245,7 +245,7 @@ variable "memory_utilization_aggregation_function" { variable "memory_utilization_transformation_function" { description = "Transformation function for memory_utilization detector (i.e. \".mean(over='5m')\")" type = string - default = ".min(over='15m').scale(100)" + default = ".min(over='15m')" } variable "memory_utilization_max_delay" { @@ -287,7 +287,7 @@ variable "memory_utilization_disabled_major" { variable "memory_utilization_threshold_critical" { description = "Critical threshold for memory_utilization detector" type = number - default = 95 + default = 10 } variable "memory_utilization_lasting_duration_critical" { @@ -304,7 +304,7 @@ variable "memory_utilization_at_least_percentage_critical" { variable "memory_utilization_threshold_major" { description = "Major threshold for memory_utilization detector" type = number - default = 90 + default = 20 } variable "memory_utilization_lasting_duration_major" { From c6143cc4a59336e43fcbf933406eead82d0b8eb2 Mon Sep 17 00:00:00 2001 From: Emmanuel Clisson Date: Fri, 13 Sep 2024 14:47:08 +0200 Subject: [PATCH 2/3] feat(gcp): modify filtering memory cloud sql --- .../conf/03-memory_utilization.yaml | 2 +- modules/integration_gcp-cloud-sql-common/detectors-gen.tf | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml index d9086b8e9..e909bc959 100644 --- a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml +++ b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml @@ -4,7 +4,7 @@ id: "memory_utilization" transformation: ".min(over='15m')" aggregation: true - +filtering: "filter('project_id', '${var.gcp_project_id}') and filter('component', 'Free')" signals: signal: diff --git a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf index d8edec0f5..fa51e2b8c 100644 --- a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf +++ b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf @@ -114,7 +114,8 @@ resource "signalfx_detector" "memory_utilization" { tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - signal = data('database/memory/components', filter=${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') + base_filtering = filter('project_id', '${var.gcp_project_id}') and filter('component', 'Free') + signal = data('database/memory/components', filter=base_filtering and ${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') detect(when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') detect(when(signal < ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR') EOF From c1a92278d830335c367658be28d746c5406d4d32 Mon Sep 17 00:00:00 2001 From: Emmanuel Clisson Date: Fri, 13 Sep 2024 14:53:29 +0200 Subject: [PATCH 3/3] feat(gcp): modify filtering memory cloud sql --- .../conf/03-memory_utilization.yaml | 2 +- modules/integration_gcp-cloud-sql-common/detectors-gen.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml index e909bc959..ade5cd5e5 100644 --- a/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml +++ b/modules/integration_gcp-cloud-sql-common/conf/03-memory_utilization.yaml @@ -4,7 +4,7 @@ id: "memory_utilization" transformation: ".min(over='15m')" aggregation: true -filtering: "filter('project_id', '${var.gcp_project_id}') and filter('component', 'Free')" +filtering: "filter('component', 'Free')" signals: signal: diff --git a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf index fa51e2b8c..186d0cd10 100644 --- a/modules/integration_gcp-cloud-sql-common/detectors-gen.tf +++ b/modules/integration_gcp-cloud-sql-common/detectors-gen.tf @@ -114,7 +114,7 @@ resource "signalfx_detector" "memory_utilization" { tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) program_text = <<-EOF - base_filtering = filter('project_id', '${var.gcp_project_id}') and filter('component', 'Free') + base_filtering = filter('component', 'Free') signal = data('database/memory/components', filter=base_filtering and ${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal') detect(when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT') detect(when(signal < ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR')