Skip to content

Commit 2d71c48

Browse files
Merge pull request #2649 from simonpasquier/bump-jsonnet-deps
NO-JIRA: chore: bump jsonnet dependencies
2 parents 709ab51 + a176694 commit 2d71c48

File tree

11 files changed

+120
-304
lines changed

11 files changed

+120
-304
lines changed

assets/control-plane/minimal-service-monitor-kubelet.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ apiVersion: monitoring.coreos.com/v1
22
kind: ServiceMonitor
33
metadata:
44
labels:
5+
app.kubernetes.io/component: kubernetes
56
app.kubernetes.io/managed-by: cluster-monitoring-operator
67
app.kubernetes.io/name: kubelet
78
app.kubernetes.io/part-of: openshift-monitoring

assets/control-plane/prometheus-rule.yaml

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ apiVersion: monitoring.coreos.com/v1
22
kind: PrometheusRule
33
metadata:
44
labels:
5+
app.kubernetes.io/component: kubernetes
56
app.kubernetes.io/managed-by: cluster-monitoring-operator
67
app.kubernetes.io/name: kube-prometheus
78
app.kubernetes.io/part-of: openshift-monitoring
@@ -243,20 +244,33 @@ spec:
243244
rules:
244245
- alert: KubeCPUOvercommit
245246
annotations:
246-
description: Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
247+
description: Cluster has overcommitted CPU resource requests for Pods by {{ printf "%.2f" $value }} CPU shares and cannot tolerate node failure.
247248
summary: Cluster has overcommitted CPU resource requests.
248249
expr: |
249-
(sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) -
250-
sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) > 0
251-
and
252-
count(max by (node) (kube_node_role{job="kube-state-metrics", role="control-plane"})) < 3)
250+
# Non-HA clusters.
251+
(
252+
(
253+
sum(namespace_cpu:kube_pod_container_resource_requests:sum{})
254+
-
255+
sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) > 0
256+
)
257+
and
258+
count(max by (node) (kube_node_role{job="kube-state-metrics", role="control-plane"})) < 3
259+
)
253260
or
254-
(sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) -
255-
(sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) -
256-
max(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"})) > 0
257-
and
258-
(sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) -
259-
max(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"})) > 0)
261+
# HA clusters.
262+
(
263+
sum(namespace_cpu:kube_pod_container_resource_requests:sum{})
264+
-
265+
(
266+
# Skip clusters with only one allocatable node.
267+
(
268+
sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"})
269+
-
270+
max(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"})
271+
) > 0
272+
) > 0
273+
)
260274
for: 10m
261275
labels:
262276
namespace: kube-system
@@ -266,17 +280,30 @@ spec:
266280
description: Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.
267281
summary: Cluster has overcommitted memory resource requests.
268282
expr: |
269-
(sum(namespace_memory:kube_pod_container_resource_requests:sum{}) -
270-
sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) > 0
271-
and
272-
count(max by (node) (kube_node_role{job="kube-state-metrics", role="control-plane"})) < 3)
283+
# Non-HA clusters.
284+
(
285+
(
286+
sum(namespace_memory:kube_pod_container_resource_requests:sum{})
287+
-
288+
sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) > 0
289+
)
290+
and
291+
count(max by (node) (kube_node_role{job="kube-state-metrics", role="control-plane"})) < 3
292+
)
273293
or
274-
(sum(namespace_memory:kube_pod_container_resource_requests:sum{}) -
275-
(sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) -
276-
max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"})) > 0
277-
and
278-
(sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) -
279-
max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"})) > 0)
294+
# HA clusters.
295+
(
296+
sum(namespace_memory:kube_pod_container_resource_requests:sum{})
297+
-
298+
(
299+
# Skip clusters with only one allocatable node.
300+
(
301+
sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"})
302+
-
303+
max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"})
304+
) > 0
305+
) > 0
306+
)
280307
for: 10m
281308
labels:
282309
namespace: kube-system
@@ -468,7 +495,18 @@ spec:
468495
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
469496
summary: Kubelet Pod startup latency is too high.
470497
expr: |
471-
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
498+
histogram_quantile(0.99,
499+
sum by (cluster, instance, le) (
500+
topk by (cluster, instance, le, operation_type) (1,
501+
rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])
502+
)
503+
)
504+
)
505+
* on(cluster, instance) group_left(node)
506+
topk by (cluster, instance, node) (1,
507+
kubelet_node_name{job="kubelet", metrics_path="/metrics"}
508+
)
509+
> 60
472510
for: 15m
473511
labels:
474512
namespace: kube-system

assets/control-plane/service-monitor-kubelet.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ apiVersion: monitoring.coreos.com/v1
22
kind: ServiceMonitor
33
metadata:
44
labels:
5+
app.kubernetes.io/component: kubernetes
56
app.kubernetes.io/managed-by: cluster-monitoring-operator
67
app.kubernetes.io/name: kubelet
78
app.kubernetes.io/part-of: openshift-monitoring

assets/node-exporter/daemonset.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ spec:
3232
automountServiceAccountToken: true
3333
containers:
3434
- args:
35-
- --web.listen-address=127.0.0.1:9100
35+
- --web.listen-address=127.0.0.1:9101
3636
- --path.sysfs=/host/sys
3737
- --path.rootfs=/host/root
38+
- --path.procfs=/host/root/proc
3839
- --path.udev.data=/host/root/run/udev/data
3940
- --no-collector.wifi
4041
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
@@ -86,7 +87,7 @@ spec:
8687
- args:
8788
- --secure-listen-address=[$(IP)]:9100
8889
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
89-
- --upstream=http://127.0.0.1:9100/
90+
- --upstream=http://127.0.0.1:9101/
9091
- --tls-cert-file=/etc/tls/private/tls.crt
9192
- --tls-private-key-file=/etc/tls/private/tls.key
9293
- --client-ca-file=/etc/tls/client/client-ca.crt

assets/node-exporter/prometheus-rule.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ spec:
155155
severity: warning
156156
- alert: NodeHighNumberConntrackEntriesUsed
157157
annotations:
158-
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
158+
description: '{{ $labels.instance }} {{ $value | humanizePercentage }} of conntrack entries are used.'
159159
summary: Number of conntrack are getting close to the limit.
160160
expr: |
161161
(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75

hack/build-jsonnet.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,6 @@ done
4343
wait
4444

4545

46-
# shellcheck disable=SC1003
47-
# Produce dashboard definitions in format understandable by CVO (it doesn't accept ConfigMapList)
48-
grep -E -v '^apiVersion: v1|^items:|^kind: ConfigMapList' "${prefix}/dashboards/console-dashboard-definitions.yaml" | sed 's/^\ \ //g;s/- apiVersion: v1/---\'$'\n''apiVersion: v1/g' > "manifests/0000_90_cluster-monitoring-operator_01-dashboards.yaml"
49-
rm -rf "${prefix}/dashboards"
50-
5146
grep -H 'kind: CustomResourceDefinition' assets/{cluster-monitoring,prometheus}-operator/* | cut -d: -f1 | while IFS= read -r f; do
5247
mv "$f" "manifests/0000_50_cluster-monitoring-operator_00_$(basename "$f")"
5348
done

jsonnet/components/dashboards.libsonnet

Lines changed: 0 additions & 184 deletions
This file was deleted.

0 commit comments

Comments
 (0)