Skip to content

Commit

Permalink
Publish
Browse files Browse the repository at this point in the history
  • Loading branch information
samber committed Aug 15, 2023
1 parent c3d7878 commit afddf71
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 6 deletions.
9 changes: 9 additions & 0 deletions dist/rules/host-and-hardware/node-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ groups:
summary: Host out of inodes (instance {{ $labels.instance }})
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HostFilesystemDeviceError
expr: 'node_filesystem_device_error == 1'
for: 0m
labels:
severity: critical
annotations:
summary: Host filesystem device error (instance {{ $labels.instance }})
description: "{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: HostInodesWillFillIn24Hours
expr: '(node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
for: 2m
Expand Down
2 changes: 1 addition & 1 deletion dist/rules/loki/embedded-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ groups:
rules:

- alert: LokiProcessTooManyRestarts
expr: 'changes(process_start_time_seconds{job=~"loki"}[15m]) > 2'
expr: 'changes(process_start_time_seconds{job=~".*loki.*"}[15m]) > 2'
for: 0m
labels:
severity: warning
Expand Down
14 changes: 14 additions & 0 deletions dist/rules/patroni/embedded-exporter-patroni.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
groups:

- name: EmbeddedExporterPatroni

rules:

- alert: PatroniHasNoLeader
expr: '(max by (scope) (patroni_master) < 1) and (max by (scope) (patroni_standby_leader) < 1)'
for: 0m
labels:
severity: critical
annotations:
summary: Patroni has no Leader (instance {{ $labels.instance }})
description: "A leader node (neither primary nor standby) cannot be found inside the cluster {{ $labels.scope }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
2 changes: 1 addition & 1 deletion dist/rules/postgresql/postgres-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ groups:
description: "Table {{ $labels.relname }} has not been auto analyzed for 10 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: PostgresqlTooManyConnections
expr: 'sum by (instance, job, server) (pg_stat_activity_count) > min by (instance, job, server) (pg_settings_max_connections * 0.8)'
expr: ''
for: 2m
labels:
severity: warning
Expand Down
6 changes: 3 additions & 3 deletions dist/rules/prometheus-self-monitoring/embedded-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,11 @@ groups:
summary: Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }})
description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: PrometheusTimeserieCardinality
- alert: PrometheusTimeseriesCardinality
expr: 'label_replace(count by(__name__) ({__name__=~".+"}), "name", "$1", "__name__", "(.+)") > 10000'
for: 0m
labels:
severity: warning
annotations:
summary: Prometheus timeserie cardinality (instance {{ $labels.instance }})
description: "The \"{{ $labels.name }}\" timeserie cardinality is getting very high: {{ $value }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
summary: Prometheus timeseries cardinality (instance {{ $labels.instance }})
description: "The \"{{ $labels.name }}\" timeseries cardinality is getting very high: {{ $value }}\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
2 changes: 1 addition & 1 deletion dist/rules/redis/oliver006-redis-exporter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ groups:
description: "Redis cluster has too many nodes marked as master.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

- alert: RedisDisconnectedSlaves
expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1'
expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 0'
for: 0m
labels:
severity: critical
Expand Down

0 comments on commit afddf71

Please sign in to comment.