From c53b31c73d05d44e49f3732a44f689014b671215 Mon Sep 17 00:00:00 2001 From: tschneid Date: Thu, 2 Nov 2023 20:17:14 -0500 Subject: [PATCH] add cluster check alert --- .../controllers/muo/staticresources/muo_alerts.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/operator/controllers/muo/staticresources/muo_alerts.yaml b/pkg/operator/controllers/muo/staticresources/muo_alerts.yaml index 67cfa031534..abc907004ff 100644 --- a/pkg/operator/controllers/muo/staticresources/muo_alerts.yaml +++ b/pkg/operator/controllers/muo/staticresources/muo_alerts.yaml @@ -63,3 +63,13 @@ spec: annotations: summary: "UpgradeConfig has not successfully synced in 4 hours." description: "This clusters UpgradeConfig has not been synced in 4 hours and may be out of date" + - alert: UpgradeClusterCheckFailedSRE + # Alert if the cluster has set its pre/post-upgrade health check failure metric for a ten-minute average window + expr: avg_over_time(upgradeoperator_cluster_check_failed[10m]) == 1 + for: 10m + labels: + severity: critical + namespace: openshift-monitoring + annotations: + summary: "cluster check failed" + description: "basic cluster checks failed on either before the upgrade or after the upgrade"