Skip to content

Commit

Permalink
prometheus.latency.rules.yml: Mark cluster wide manager metrics with …
Browse files Browse the repository at this point in the history
…labels

(cherry picked from commit e9005b2)
  • Loading branch information
amnonh committed Apr 21, 2024
1 parent 4fe3630 commit e5452c6
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
12 changes: 12 additions & 0 deletions docs/source/procedures/datadog/datadog.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,15 @@ groups:
labels:
by: "cluster"
dd: "1"
- record: scylla_manager_healthcheck_cql_status_ag
expr: sum(scylla_manager_healthcheck_cql_status) by(cluster)
labels:
by: "cluster"
status: "1"
dd: "1"
- record: scylla_manager_healthcheck_cql_status_ag
expr: count(scylla_manager_healthcheck_cql_status==0) by(cluster)
labels:
by: "cluster"
status: "0"
dd: "1"
12 changes: 12 additions & 0 deletions prometheus/prom_rules/prometheus.latency.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,24 @@ groups:
expr: errors:local_failed + errors:operation_unavailable
- record: manager:repair_done_ts
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_done_ts
labels:
dd: "1"
by: "cluster"
- record: manager:backup_done_ts
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="DONE",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_done_ts
labels:
dd: "1"
by: "cluster"
- record: manager:repair_fail_ts
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="repair"}[2m])) by (cluster) > 0) or on(cluster) manager:repair_fail_ts
labels:
dd: "1"
by: "cluster"
- record: manager:backup_fail_ts
expr: timestamp(sum(changes(scylla_manager_scheduler_run_total{status="ERROR",type="backup"}[2m])) by (cluster) > 0) or on(cluster) manager:backup_fail_ts
labels:
dd: "1"
by: "cluster"
- record: manager:repair_progress
expr: (max(scylla_manager_scheduler_run_indicator{type="repair"}) by (cluster) >bool 0)*((max(scylla_manager_repair_token_ranges_total) by(cluster)<= 0)*0 or on(cluster) (sum(scylla_manager_repair_token_ranges_success>=0) by (cluster) + sum(scylla_manager_repair_token_ranges_error>=0) by (cluster))/sum(scylla_manager_repair_token_ranges_total>=0) by (cluster))
- record: manager:backup_progress
Expand Down

0 comments on commit e5452c6

Please sign in to comment.