Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix gcp memory utilization #569

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/severity.md
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@
|GCP Cloud SQL heartbeat|X|-|-|-|-|
|GCP Cloud SQL cpu utilization|X|X|-|-|-|
|GCP Cloud SQL disk utilization|X|X|-|-|-|
|GCP Cloud SQL memory utilization|X|X|-|-|-|
|GCP Cloud SQL memory left|X|X|-|-|-|


## integration_gcp-cloud-sql-failover
Expand Down
3 changes: 2 additions & 1 deletion modules/integration_gcp-cloud-sql-common/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ This module creates the following SignalFx detectors which could contain one or
|GCP Cloud SQL heartbeat|X|-|-|-|-|
|GCP Cloud SQL cpu utilization|X|X|-|-|-|
|GCP Cloud SQL disk utilization|X|X|-|-|-|
|GCP Cloud SQL memory utilization|X|X|-|-|-|
|GCP Cloud SQL memory left|X|X|-|-|-|

## How to collect required metrics?

Expand All @@ -105,6 +105,7 @@ Here is the list of required metrics for detectors in this module.
* `database/cpu/usage_time`
* `database/cpu/utilization`
* `database/disk/utilization`
* `database/memory/components`
* `database/memory/utilization`


Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
module: "GCP Cloud SQL"
name: "Memory utilization"
name: "Memory left"
id: "memory_utilization"

transformation: ".min(over='15m').scale(100)"
transformation: ".min(over='15m')"
aggregation: true

filtering: "filter('component', 'Free')"

signals:
signal:
metric: "database/memory/utilization"
metric: "database/memory/components"

rules:
critical:
threshold: 95
comparator: ">"
threshold: 10
comparator: "<"

major:
threshold: 90
comparator: ">"
dependency: "critical"
threshold: 20
comparator: "<"
dependency: "critical"
13 changes: 7 additions & 6 deletions modules/integration_gcp-cloud-sql-common/detectors-gen.tf
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,21 @@ EOF
}

resource "signalfx_detector" "memory_utilization" {
name = format("%s %s", local.detector_name_prefix, "GCP Cloud SQL memory utilization")
name = format("%s %s", local.detector_name_prefix, "GCP Cloud SQL memory left")

authorized_writer_teams = var.authorized_writer_teams
teams = try(coalescelist(var.teams, var.authorized_writer_teams), null)
tags = compact(concat(local.common_tags, local.tags, var.extra_tags))

program_text = <<-EOF
signal = data('database/memory/utilization', filter=${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal')
detect(when(signal > ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT')
detect(when(signal > ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal > ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR')
base_filtering = filter('component', 'Free')
signal = data('database/memory/components', filter=base_filtering and ${module.filtering.signalflow})${var.memory_utilization_aggregation_function}${var.memory_utilization_transformation_function}.publish('signal')
detect(when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif})).publish('CRIT')
detect(when(signal < ${var.memory_utilization_threshold_major}%{if var.memory_utilization_lasting_duration_major != null}, lasting='${var.memory_utilization_lasting_duration_major}', at_least=${var.memory_utilization_at_least_percentage_major}%{endif}) and (not when(signal < ${var.memory_utilization_threshold_critical}%{if var.memory_utilization_lasting_duration_critical != null}, lasting='${var.memory_utilization_lasting_duration_critical}', at_least=${var.memory_utilization_at_least_percentage_critical}%{endif}))).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.memory_utilization_threshold_critical}"
description = "is too low < ${var.memory_utilization_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.memory_utilization_disabled_critical, var.memory_utilization_disabled, var.detectors_disabled)
Expand All @@ -132,7 +133,7 @@ EOF
}

rule {
description = "is too high > ${var.memory_utilization_threshold_major}"
description = "is too low < ${var.memory_utilization_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.memory_utilization_disabled_major, var.memory_utilization_disabled, var.detectors_disabled)
Expand Down
6 changes: 3 additions & 3 deletions modules/integration_gcp-cloud-sql-common/variables-gen.tf
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ variable "memory_utilization_aggregation_function" {
variable "memory_utilization_transformation_function" {
description = "Transformation function for memory_utilization detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='15m').scale(100)"
default = ".min(over='15m')"
}

variable "memory_utilization_max_delay" {
Expand Down Expand Up @@ -287,7 +287,7 @@ variable "memory_utilization_disabled_major" {
variable "memory_utilization_threshold_critical" {
description = "Critical threshold for memory_utilization detector"
type = number
default = 95
default = 10
}

variable "memory_utilization_lasting_duration_critical" {
Expand All @@ -304,7 +304,7 @@ variable "memory_utilization_at_least_percentage_critical" {
variable "memory_utilization_threshold_major" {
description = "Major threshold for memory_utilization detector"
type = number
default = 90
default = 20
}

variable "memory_utilization_lasting_duration_major" {
Expand Down
Loading