Skip to content

Commit

Permalink
Enable cluster to read/emit ARO Operator metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
tsatam committed Feb 23, 2024
1 parent c2559c8 commit 9a6afb3
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 0 deletions.
53 changes: 53 additions & 0 deletions pkg/operator/deploy/staticresources/aro_alerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Original source from OSD/ROSA managed-cluster-config, see https://github.com/openshift/managed-cluster-config/blob/master/deploy/sre-prometheus/100-managed-upgrade-operator.PrometheusRule.yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: aro-operator-alerts
role: alert-rules
name: aro-operator-alerts
namespace: openshift-monitoring
spec:
groups:
- name: aro-operator-alerts
rules:
- alert: AROServicePrincipalInvalid
expr: arooperator_service_principal_valid == 0
for: 10m
labels:
severity: warning
annotations:
summary: Cluster Service Principal is invalid
description: >
Your cluster's service principal is invalid.
Please refer to {TODO FIND DOCUMENTATION} in order to refresh your service principal.
- alert: ARORequiredEndpointInaccessible
expr: arooperator_required_endpoint_accessible == 0
for: 10m
labels:
severity: warning
annotations:
summary: Required endpoint {{$labels.endpoint_url}} is not accessible from {{$labels.role}}
description: >
The required endpoint {{$labels.endpoint_url}} is not accessible within your cluster's {{$labels.role}} nodes.
If you are restricting egress traffic on your cluster, please refer to https://learn.microsoft.com/en-us/azure/openshift/howto-restrict-egress and ensure your networking configuration allows the cluster to access the specified endpoints.
- alert: AROIngressCertificateInvalid
expr: arooperator_ingress_certificate_valid == 0
for: 10m
labels:
severity: warning
annotations:
summary: Cluster ingress certificate is invalid
description: >
Your cluster's ingress certificate is expired, invalid, or missing.
Please refer to {TODO FIND DOCUMENTATION} in order to provide a valid certificate.
- alert: ARODNSConfigurationInvalid
expr: arooperator_dns_configuration_valid == 0
for: 10m
labels:
severity: warning
annotations:
summary: Cluster DNS configuration is invalid
description: >
Your cluster's custom DNS configuration is invalid.
Please refer to https://learn.microsoft.com/en-us/azure/openshift/howto-custom-dns in order to configure custom DNS for your cluster.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ spec:
metadata:
labels:
app: aro-operator-master
name: aro-operator
role: master
spec:
containers:
- command:
Expand Down
17 changes: 17 additions & 0 deletions pkg/operator/deploy/staticresources/master/metrics_service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: aro-operator-master-metrics
namespace: openshift-azure-operator
labels:
name: aro-operator
role: master
spec:
selector:
name: aro-operator
role: master
ports:
- name: http-metrics
port: 8383
targetPort: 8383
protocol: TCP
17 changes: 17 additions & 0 deletions pkg/operator/deploy/staticresources/master/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: aro-operator-master-metrics
namespace: openshift-azure-operator
labels:
name: aro-operator
role: master
spec:
selector:
matchLabels:
name: aro-operator
role: master
endpoints:
- port: http-metrics
scheme: http

2 changes: 2 additions & 0 deletions pkg/operator/deploy/staticresources/namespace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ metadata:
name: openshift-azure-operator
annotations:
openshift.io/node-selector: ""
labels:
openshift.io/cluster-monitoring: "true"
16 changes: 16 additions & 0 deletions pkg/operator/deploy/staticresources/prometheus_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: openshift-azure-operator
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
12 changes: 12 additions & 0 deletions pkg/operator/deploy/staticresources/prometheus_rolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: openshift-azure-operator
roleRef:
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: openshift-monitoring
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ spec:
metadata:
labels:
app: aro-operator-worker
name: aro-operator
role: worker
spec:
containers:
- command:
Expand Down
17 changes: 17 additions & 0 deletions pkg/operator/deploy/staticresources/worker/metrics_service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: aro-operator-worker-metrics
namespace: openshift-azure-operator
labels:
name: aro-operator
role: worker
spec:
selector:
name: aro-operator
role: worker
ports:
- name: http-metrics
port: 8383
targetPort: 8383
protocol: TCP
16 changes: 16 additions & 0 deletions pkg/operator/deploy/staticresources/worker/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: aro-operator-worker-metrics
namespace: openshift-azure-operator
labels:
name: aro-operator
role: worker
spec:
selector:
matchLabels:
name: aro-operator
role: worker
endpoints:
- port: http-metrics
scheme: http

0 comments on commit 9a6afb3

Please sign in to comment.