From 60b0feebac56972e826879ac9f15f547a67da73d Mon Sep 17 00:00:00 2001
From: Eugene Jahn <ejahn@github.com>
Date: Tue, 5 May 2026 15:26:15 -0400
Subject: [PATCH 1/5] monitoring: add consolidated workloads CPU/memory
 dashboard

Adds a single GCP Monitoring dashboard that surfaces CPU and memory
across all Sigstore GKE workloads (grouped by namespace / container),
so oncall does not have to navigate multiple metric pages while
investigating resource issues.

The dashboard includes:
  - CPU usage in cores (rate of core_usage_time)
  - Memory used (non-evictable bytes)
  - CPU/memory limit utilization (REDUCE_MAX so a hot replica is visible)
  - CPU/memory request utilization (REDUCE_MAX)
  - Container restart deltas
  - Node CPU allocatable utilization

Resolves sigstore/public-good-instance#1122

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Eugene Jahn <ejahn@sigstore.dev>
---
 gcp/modules/monitoring/infra/dashboards.tf  |   8 +
 gcp/modules/monitoring/infra/workloads.json | 350 ++++++++++++++++++++
 2 files changed, 358 insertions(+)
 create mode 100644 gcp/modules/monitoring/infra/workloads.json

diff --git a/gcp/modules/monitoring/infra/dashboards.tf b/gcp/modules/monitoring/infra/dashboards.tf
index a5070b30..f7e5f8a0 100644
--- a/gcp/modules/monitoring/infra/dashboards.tf
+++ b/gcp/modules/monitoring/infra/dashboards.tf
@@ -72,3 +72,11 @@ resource "google_monitoring_dashboard" "rekor_v1" {
     rekor_url = var.rekor_url
   })
 }
+
+# Consolidated CPU/memory dashboard for all Sigstore GKE workloads.
+# See https://github.com/sigstore/public-good-instance/issues/1122
+resource "google_monitoring_dashboard" "workloads" {
+  project = var.project_id
+
+  dashboard_json = file("${path.module}/workloads.json")
+}
diff --git a/gcp/modules/monitoring/infra/workloads.json b/gcp/modules/monitoring/infra/workloads.json
new file mode 100644
index 00000000..5407d41c
--- /dev/null
+++ b/gcp/modules/monitoring/infra/workloads.json
@@ -0,0 +1,350 @@
+{
+  "displayName": "Workloads CPU & Memory",
+  "mosaicLayout": {
+    "columns": 12,
+    "tiles": [
+      {
+        "xPos": 0,
+        "yPos": 0,
+        "width": 12,
+        "height": 4,
+        "widget": {
+          "title": "Overview",
+          "text": {
+            "content": "Consolidated CPU and memory view for all Sigstore GKE workloads (Fulcio, Rekor, CTLog, Trillian, Dex, prober, monitoring, etc.). Charts are grouped by `namespace` / `container_name`. Use this dashboard as the first stop when investigating high resource usage during oncall.\n\nMetric source: GKE container metrics (`kubernetes.io/container/*`).",
+            "format": "MARKDOWN",
+            "style": {
+              "fontSize": "FS_LARGE",
+              "padding": "P_EXTRA_SMALL"
+            }
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 4,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "CPU usage (cores) by container",
+          "xyChart": {
+            "yAxis": {
+              "label": "cores",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/cpu/core_usage_time\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "1"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 4,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "Memory used (bytes) by container",
+          "xyChart": {
+            "yAxis": {
+              "label": "bytes",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/memory/used_bytes\" resource.type=\"k8s_container\" metric.label.\"memory_type\"=\"non-evictable\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "By"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 20,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "CPU limit utilization (% of container limit)",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "thresholds": [
+              {
+                "value": 0.8,
+                "color": "YELLOW",
+                "direction": "ABOVE"
+              },
+              {
+                "value": 0.95,
+                "color": "RED",
+                "direction": "ABOVE"
+              }
+            ],
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/cpu/limit_utilization\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MAX",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 20,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "Memory limit utilization (% of container limit)",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "thresholds": [
+              {
+                "value": 0.8,
+                "color": "YELLOW",
+                "direction": "ABOVE"
+              },
+              {
+                "value": 0.95,
+                "color": "RED",
+                "direction": "ABOVE"
+              }
+            ],
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/memory/limit_utilization\" resource.type=\"k8s_container\" metric.label.\"memory_type\"=\"non-evictable\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MAX",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 36,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "CPU request utilization (% of container request)",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/cpu/request_utilization\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MAX",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 36,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "Memory request utilization (% of container request)",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/memory/request_utilization\" resource.type=\"k8s_container\" metric.label.\"memory_type\"=\"non-evictable\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MAX",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 52,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "Container restarts (delta, 5m)",
+          "xyChart": {
+            "yAxis": {
+              "label": "restarts",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "STACKED_BAR",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/restart_count\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "300s",
+                      "perSeriesAligner": "ALIGN_DELTA",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  }
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 52,
+        "width": 6,
+        "height": 16,
+        "widget": {
+          "title": "Node CPU allocatable utilization",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "thresholds": [
+              {
+                "value": 0.9,
+                "color": "RED",
+                "direction": "ABOVE"
+              }
+            ],
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.node_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/node/cpu/allocatable_utilization\" resource.type=\"k8s_node\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MEAN",
+                      "groupByFields": [
+                        "resource.label.node_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

From f9ed726d50d4b34d9c60eba15ab0862a82326835 Mon Sep 17 00:00:00 2001
From: Eugene Jahn <ejahn@sigstore.dev>
Date: Wed, 6 May 2026 14:00:26 -0400
Subject: [PATCH 2/5] monitoring: drop unsupported threshold color/direction
 fields

The xyChart threshold schema does not accept color/direction for these
chart types; the dashboard create rejects them. Keep just the value.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Eugene Jahn <ejahn@sigstore.dev>
---
 gcp/modules/monitoring/infra/workloads.json | 22 ++++++---------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/gcp/modules/monitoring/infra/workloads.json b/gcp/modules/monitoring/infra/workloads.json
index 5407d41c..94cfd204 100644
--- a/gcp/modules/monitoring/infra/workloads.json
+++ b/gcp/modules/monitoring/infra/workloads.json
@@ -108,14 +108,10 @@
             },
             "thresholds": [
               {
-                "value": 0.8,
-                "color": "YELLOW",
-                "direction": "ABOVE"
+                "value": 0.8
               },
               {
-                "value": 0.95,
-                "color": "RED",
-                "direction": "ABOVE"
+                "value": 0.95
               }
             ],
             "dataSets": [
@@ -157,14 +153,10 @@
             },
             "thresholds": [
               {
-                "value": 0.8,
-                "color": "YELLOW",
-                "direction": "ABOVE"
+                "value": 0.8
               },
               {
-                "value": 0.95,
-                "color": "RED",
-                "direction": "ABOVE"
+                "value": 0.95
               }
             ],
             "dataSets": [
@@ -316,9 +308,7 @@
             },
             "thresholds": [
               {
-                "value": 0.9,
-                "color": "RED",
-                "direction": "ABOVE"
+                "value": 0.9
               }
             ],
             "dataSets": [
@@ -347,4 +337,4 @@
       }
     ]
   }
-}
\ No newline at end of file
+}

From 939aeede2bcbbd49c3af73c9d1ff53c561fce775 Mon Sep 17 00:00:00 2001
From: Eugene Jahn <ejahn@sigstore.dev>
Date: Wed, 6 May 2026 14:59:41 -0400
Subject: [PATCH 3/5] monitoring: fix workloads dashboard tile sizing

Heights of 16 in a 12-column mosaic produced very tall narrow tiles.
Use h=4 (standard) for charts and keep h=4 for the overview banner.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Eugene Jahn <ejahn@sigstore.dev>
---
 gcp/modules/monitoring/infra/workloads.json | 28 ++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/gcp/modules/monitoring/infra/workloads.json b/gcp/modules/monitoring/infra/workloads.json
index 94cfd204..707151ff 100644
--- a/gcp/modules/monitoring/infra/workloads.json
+++ b/gcp/modules/monitoring/infra/workloads.json
@@ -24,7 +24,7 @@
         "xPos": 0,
         "yPos": 4,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "CPU usage (cores) by container",
           "xyChart": {
@@ -61,7 +61,7 @@
         "xPos": 6,
         "yPos": 4,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "Memory used (bytes) by container",
           "xyChart": {
@@ -96,9 +96,9 @@
       },
       {
         "xPos": 0,
-        "yPos": 20,
+        "yPos": 8,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "CPU limit utilization (% of container limit)",
           "xyChart": {
@@ -141,9 +141,9 @@
       },
       {
         "xPos": 6,
-        "yPos": 20,
+        "yPos": 8,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "Memory limit utilization (% of container limit)",
           "xyChart": {
@@ -186,9 +186,9 @@
       },
       {
         "xPos": 0,
-        "yPos": 36,
+        "yPos": 12,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "CPU request utilization (% of container request)",
           "xyChart": {
@@ -223,9 +223,9 @@
       },
       {
         "xPos": 6,
-        "yPos": 36,
+        "yPos": 12,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "Memory request utilization (% of container request)",
           "xyChart": {
@@ -260,9 +260,9 @@
       },
       {
         "xPos": 0,
-        "yPos": 52,
+        "yPos": 16,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "Container restarts (delta, 5m)",
           "xyChart": {
@@ -296,9 +296,9 @@
       },
       {
         "xPos": 6,
-        "yPos": 52,
+        "yPos": 16,
         "width": 6,
-        "height": 16,
+        "height": 4,
         "widget": {
           "title": "Node CPU allocatable utilization",
           "xyChart": {

From bc09031151c12034e660c067be37c02d0d17707c Mon Sep 17 00:00:00 2001
From: Eugene Jahn <ejahn@sigstore.dev>
Date: Thu, 7 May 2026 10:46:49 -0400
Subject: [PATCH 4/5] monitoring: add network, ephemeral storage, node memory,
 and uptime tiles to workloads dashboard

Mirrors the standard GKE Workloads dashboard so oncall does not have
to navigate to multiple pages to find resource usage charts:

  - Pod network received / sent (per namespace)
  - Ephemeral storage used (per container)
  - Node memory allocatable utilization (sibling of node CPU)
  - Running containers per namespace (uptime count)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Eugene Jahn <ejahn@sigstore.dev>
---
 gcp/modules/monitoring/infra/workloads.json | 185 ++++++++++++++++++++
 1 file changed, 185 insertions(+)

diff --git a/gcp/modules/monitoring/infra/workloads.json b/gcp/modules/monitoring/infra/workloads.json
index 707151ff..311dd1b4 100644
--- a/gcp/modules/monitoring/infra/workloads.json
+++ b/gcp/modules/monitoring/infra/workloads.json
@@ -334,6 +334,191 @@
             ]
           }
         }
+      },
+      {
+        "xPos": 0,
+        "yPos": 20,
+        "width": 6,
+        "height": 4,
+        "widget": {
+          "title": "Node memory allocatable utilization",
+          "xyChart": {
+            "yAxis": {
+              "label": "utilization",
+              "scale": "LINEAR"
+            },
+            "thresholds": [
+              {
+                "value": 0.9
+              }
+            ],
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.node_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/node/memory/allocatable_utilization\" resource.type=\"k8s_node\" metric.label.\"memory_type\"=\"non-evictable\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_MEAN",
+                      "groupByFields": [
+                        "resource.label.node_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "10^2.%"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 20,
+        "width": 6,
+        "height": 4,
+        "widget": {
+          "title": "Ephemeral storage used (bytes) by container",
+          "xyChart": {
+            "yAxis": {
+              "label": "bytes",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}/${resource.labels.container_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/ephemeral_storage/used_bytes\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name",
+                        "resource.label.container_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "By"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 24,
+        "width": 6,
+        "height": 4,
+        "widget": {
+          "title": "Pod network received (bytes/s) by namespace",
+          "xyChart": {
+            "yAxis": {
+              "label": "bytes/s",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/pod/network/received_bytes_count\" resource.type=\"k8s_pod\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "By/s"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 6,
+        "yPos": 24,
+        "width": 6,
+        "height": 4,
+        "widget": {
+          "title": "Pod network sent (bytes/s) by namespace",
+          "xyChart": {
+            "yAxis": {
+              "label": "bytes/s",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "LINE",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/pod/network/sent_bytes_count\" resource.type=\"k8s_pod\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_RATE",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name"
+                      ]
+                    }
+                  },
+                  "unitOverride": "By/s"
+                }
+              }
+            ]
+          }
+        }
+      },
+      {
+        "xPos": 0,
+        "yPos": 28,
+        "width": 12,
+        "height": 4,
+        "widget": {
+          "title": "Running containers per namespace (uptime samples)",
+          "xyChart": {
+            "yAxis": {
+              "label": "containers",
+              "scale": "LINEAR"
+            },
+            "dataSets": [
+              {
+                "plotType": "STACKED_AREA",
+                "targetAxis": "Y1",
+                "legendTemplate": "${resource.labels.namespace_name}",
+                "timeSeriesQuery": {
+                  "timeSeriesFilter": {
+                    "filter": "metric.type=\"kubernetes.io/container/uptime\" resource.type=\"k8s_container\"",
+                    "aggregation": {
+                      "alignmentPeriod": "60s",
+                      "perSeriesAligner": "ALIGN_COUNT",
+                      "crossSeriesReducer": "REDUCE_SUM",
+                      "groupByFields": [
+                        "resource.label.namespace_name"
+                      ]
+                    }
+                  }
+                }
+              }
+            ]
+          }
+        }
       }
     ]
   }

From 0c4b7c1376da058ee4b3baf68827f5fb22669aec Mon Sep 17 00:00:00 2001
From: Eugene Jahn <ejahn@sigstore.dev>
Date: Thu, 7 May 2026 10:52:42 -0400
Subject: [PATCH 5/5] monitoring: count running containers exactly via
 REDUCE_COUNT

Previous tile used ALIGN_COUNT + REDUCE_SUM, which sums sample counts
within the alignment window and is an approximation of container
count. Switch to ALIGN_MEAN per series + REDUCE_COUNT across series
so the y-axis is the exact number of running containers per
namespace.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Eugene Jahn <ejahn@sigstore.dev>
---
 gcp/modules/monitoring/infra/workloads.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/modules/monitoring/infra/workloads.json b/gcp/modules/monitoring/infra/workloads.json
index 311dd1b4..3c99fd54 100644
--- a/gcp/modules/monitoring/infra/workloads.json
+++ b/gcp/modules/monitoring/infra/workloads.json
@@ -491,7 +491,7 @@
         "width": 12,
         "height": 4,
         "widget": {
-          "title": "Running containers per namespace (uptime samples)",
+          "title": "Running containers per namespace",
           "xyChart": {
             "yAxis": {
               "label": "containers",
@@ -507,8 +507,8 @@
                     "filter": "metric.type=\"kubernetes.io/container/uptime\" resource.type=\"k8s_container\"",
                     "aggregation": {
                       "alignmentPeriod": "60s",
-                      "perSeriesAligner": "ALIGN_COUNT",
-                      "crossSeriesReducer": "REDUCE_SUM",
+                      "perSeriesAligner": "ALIGN_MEAN",
+                      "crossSeriesReducer": "REDUCE_COUNT",
                       "groupByFields": [
                         "resource.label.namespace_name"
                       ]