diff --git a/pkg/operator/deploy/staticresources/aro_alerts.yaml b/pkg/operator/deploy/staticresources/aro_alerts.yaml new file mode 100644 index 00000000000..2f0c6fffec3 --- /dev/null +++ b/pkg/operator/deploy/staticresources/aro_alerts.yaml @@ -0,0 +1,53 @@ +# Original source from OSD/ROSA managed-cluster-config, see https://github.com/openshift/managed-cluster-config/blob/master/deploy/sre-prometheus/100-managed-upgrade-operator.PrometheusRule.yaml +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: aro-operator-alerts + role: alert-rules + name: aro-operator-alerts + namespace: openshift-monitoring +spec: + groups: + - name: aro-operator-alerts + rules: + - alert: AROServicePrincipalInvalid + expr: arooperator_service_principal_valid == 0 + for: 10m + labels: + severity: warning + annotations: + summary: Cluster Service Principal is invalid + description: > + Your cluster's service principal is invalid. + Please refer to {TODO FIND DOCUMENTATION} in order to refresh your service principal. + - alert: ARORequiredEndpointInaccessible + expr: arooperator_required_endpoint_accessible == 0 + for: 10m + labels: + severity: warning + annotations: + summary: Required endpoint {{$labels.endpoint_url}} is not accessible from {{$labels.role}} + description: > + The required endpoint {{$labels.endpoint_url}} is not accessible within your cluster's {{$labels.role}} nodes. + If you are restricting egress traffic on your cluster, please refer to https://learn.microsoft.com/en-us/azure/openshift/howto-restrict-egress and ensure your networking configuration allows the cluster to access the specified endpoints. + - alert: AROIngressCertificateInvalid + expr: arooperator_ingress_certificate_valid == 0 + for: 10m + labels: + severity: warning + annotations: + summary: Cluster ingress certificate is invalid + description: > + Your cluster's ingress certificate is expired, invalid, or missing. + Please refer to {TODO FIND DOCUMENTATION} in order to provide a valid certificate. + - alert: ARODNSConfigurationInvalid + expr: arooperator_dns_configuration_valid == 0 + for: 10m + labels: + severity: warning + annotations: + summary: Cluster DNS configuration is invalid + description: > + Your cluster's custom DNS configuration is invalid. + Please refer to https://learn.microsoft.com/en-us/azure/openshift/howto-custom-dns in order to configure custom DNS for your cluster. diff --git a/pkg/operator/deploy/staticresources/master/deployment.yaml.tmpl b/pkg/operator/deploy/staticresources/master/deployment.yaml.tmpl index a425c35f422..a8cdbddf282 100644 --- a/pkg/operator/deploy/staticresources/master/deployment.yaml.tmpl +++ b/pkg/operator/deploy/staticresources/master/deployment.yaml.tmpl @@ -19,6 +19,8 @@ spec: metadata: labels: app: aro-operator-master + name: aro-operator + role: master spec: containers: - command: diff --git a/pkg/operator/deploy/staticresources/master/metrics_service.yaml b/pkg/operator/deploy/staticresources/master/metrics_service.yaml new file mode 100644 index 00000000000..4d85738d1d7 --- /dev/null +++ b/pkg/operator/deploy/staticresources/master/metrics_service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: aro-operator-master-metrics + namespace: openshift-azure-operator + labels: + name: aro-operator + role: master +spec: + selector: + name: aro-operator + role: master + ports: + - name: http-metrics + port: 8383 + targetPort: 8383 + protocol: TCP diff --git a/pkg/operator/deploy/staticresources/master/servicemonitor.yaml b/pkg/operator/deploy/staticresources/master/servicemonitor.yaml new file mode 100644 index 00000000000..04b3486f4f1 --- /dev/null +++ b/pkg/operator/deploy/staticresources/master/servicemonitor.yaml @@ -0,0 +1,17 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: aro-operator-master-metrics + namespace: openshift-azure-operator + labels: + name: aro-operator + role: master +spec: + selector: + matchLabels: + name: aro-operator + role: master + endpoints: + - port: http-metrics + scheme: http + \ No newline at end of file diff --git a/pkg/operator/deploy/staticresources/namespace.yaml b/pkg/operator/deploy/staticresources/namespace.yaml index 1192d364dbf..34729604027 100644 --- a/pkg/operator/deploy/staticresources/namespace.yaml +++ b/pkg/operator/deploy/staticresources/namespace.yaml @@ -4,3 +4,5 @@ metadata: name: openshift-azure-operator annotations: openshift.io/node-selector: "" + labels: + openshift.io/cluster-monitoring: "true" diff --git a/pkg/operator/deploy/staticresources/prometheus_role.yaml b/pkg/operator/deploy/staticresources/prometheus_role.yaml new file mode 100644 index 00000000000..76394e5d02b --- /dev/null +++ b/pkg/operator/deploy/staticresources/prometheus_role.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s + namespace: openshift-azure-operator +rules: +- apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/pkg/operator/deploy/staticresources/prometheus_rolebinding.yaml b/pkg/operator/deploy/staticresources/prometheus_rolebinding.yaml new file mode 100644 index 00000000000..63205d59016 --- /dev/null +++ b/pkg/operator/deploy/staticresources/prometheus_rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s + namespace: openshift-azure-operator +roleRef: + kind: Role + name: prometheus-k8s +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring diff --git a/pkg/operator/deploy/staticresources/worker/deployment.yaml.tmpl b/pkg/operator/deploy/staticresources/worker/deployment.yaml.tmpl index 697cb34b17b..7f580d83f34 100644 --- a/pkg/operator/deploy/staticresources/worker/deployment.yaml.tmpl +++ b/pkg/operator/deploy/staticresources/worker/deployment.yaml.tmpl @@ -19,6 +19,8 @@ spec: metadata: labels: app: aro-operator-worker + name: aro-operator + role: worker spec: containers: - command: diff --git a/pkg/operator/deploy/staticresources/worker/metrics_service.yaml b/pkg/operator/deploy/staticresources/worker/metrics_service.yaml new file mode 100644 index 00000000000..089bdda1ddf --- /dev/null +++ b/pkg/operator/deploy/staticresources/worker/metrics_service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: aro-operator-worker-metrics + namespace: openshift-azure-operator + labels: + name: aro-operator + role: worker +spec: + selector: + name: aro-operator + role: worker + ports: + - name: http-metrics + port: 8383 + targetPort: 8383 + protocol: TCP diff --git a/pkg/operator/deploy/staticresources/worker/servicemonitor.yaml b/pkg/operator/deploy/staticresources/worker/servicemonitor.yaml new file mode 100644 index 00000000000..0cf02f44e23 --- /dev/null +++ b/pkg/operator/deploy/staticresources/worker/servicemonitor.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: aro-operator-worker-metrics + namespace: openshift-azure-operator + labels: + name: aro-operator + role: worker +spec: + selector: + matchLabels: + name: aro-operator + role: worker + endpoints: + - port: http-metrics + scheme: http