From 7478a919a2f8ef57f08b7b88170957a2858fb3f3 Mon Sep 17 00:00:00 2001 From: Matt Pryor Date: Tue, 11 Jun 2024 15:15:56 +0100 Subject: [PATCH] Replace KSM and helm-exporter with built-in metrics (#462) --- .github/workflows/update-dependencies.yaml | 7 - charts/server/Chart.yaml | 8 -- .../templates/{metrics => }/configmap.yaml | 0 .../metrics/helm-exporter/deployment.yaml | 69 ---------- .../templates/metrics/helm-exporter/role.yaml | 18 --- .../metrics/helm-exporter/rolebinding.yaml | 18 --- .../metrics/helm-exporter/service.yaml | 15 -- .../metrics/helm-exporter/serviceaccount.yaml | 7 - .../metrics/helm-exporter/servicemonitor.yaml | 16 --- charts/server/templates/sync/deployment.yaml | 4 + .../{metrics => sync}/prometheusrule.yaml | 16 +-- charts/server/templates/sync/service.yaml | 15 ++ .../server/templates/sync/servicemonitor.yaml | 19 +++ charts/server/values.yaml | 96 ------------- sync/requirements.txt | 7 + sync/setup.cfg | 1 + sync/zenith/sync/main.py | 3 +- sync/zenith/sync/metrics.py | 128 ++++++++++++++++++ sync/zenith/sync/processor/base.py | 8 +- sync/zenith/sync/processor/helm.py | 26 +++- sync/zenith/sync/store/base.py | 8 +- sync/zenith/sync/store/crd/store.py | 55 +++++++- 22 files changed, 277 insertions(+), 267 deletions(-) rename charts/server/templates/{metrics => }/configmap.yaml (100%) delete mode 100644 charts/server/templates/metrics/helm-exporter/deployment.yaml delete mode 100644 charts/server/templates/metrics/helm-exporter/role.yaml delete mode 100644 charts/server/templates/metrics/helm-exporter/rolebinding.yaml delete mode 100644 charts/server/templates/metrics/helm-exporter/service.yaml delete mode 100644 charts/server/templates/metrics/helm-exporter/serviceaccount.yaml delete mode 100644 charts/server/templates/metrics/helm-exporter/servicemonitor.yaml rename charts/server/templates/{metrics => sync}/prometheusrule.yaml (88%) create mode 100644 charts/server/templates/sync/service.yaml create mode 100644 charts/server/templates/sync/servicemonitor.yaml create mode 100644 sync/zenith/sync/metrics.py diff --git a/.github/workflows/update-dependencies.yaml b/.github/workflows/update-dependencies.yaml index 616ec527..18bbeedf 100644 --- a/.github/workflows/update-dependencies.yaml +++ b/.github/workflows/update-dependencies.yaml @@ -91,13 +91,6 @@ jobs: chart_repo_jsonpath: dependencies[0].repository chart_version_jsonpath: dependencies[0].version - - key: kube-state-metrics - component: metrics - path: ./charts/server/Chart.yaml - chart_name_jsonpath: dependencies[0].name - chart_repo_jsonpath: dependencies[0].repository - chart_version_jsonpath: dependencies[0].version - name: ${{ matrix.key }} steps: - name: Checkout diff --git a/charts/server/Chart.yaml b/charts/server/Chart.yaml index bf2e768b..3eb798e2 100644 --- a/charts/server/Chart.yaml +++ b/charts/server/Chart.yaml @@ -6,11 +6,3 @@ type: application # The version and appVersion are updated by the chart build script version: 0.1.0 appVersion: main - -dependencies: - # prometheus-community/kube-state-metrics to produce metrics on clusters - - name: kube-state-metrics - repository: https://prometheus-community.github.io/helm-charts - version: 5.19.1 - alias: metrics - condition: metrics.enabled diff --git a/charts/server/templates/metrics/configmap.yaml b/charts/server/templates/configmap.yaml similarity index 100% rename from charts/server/templates/metrics/configmap.yaml rename to charts/server/templates/configmap.yaml diff --git a/charts/server/templates/metrics/helm-exporter/deployment.yaml b/charts/server/templates/metrics/helm-exporter/deployment.yaml deleted file mode 100644 index 97578f6f..00000000 --- a/charts/server/templates/metrics/helm-exporter/deployment.yaml +++ /dev/null @@ -1,69 +0,0 @@ -{{- if .Values.metrics.enabled -}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} -spec: - replicas: 1 - selector: - matchLabels: {{ include "zenith.componentSelectorLabels" (list . "helm-exporter") | nindent 6 }} - template: - metadata: - labels: {{ include "zenith.componentSelectorLabels" (list . "helm-exporter") | nindent 8 }} - annotations: - {{- with .Values.metrics.helmExporter.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - serviceAccountName: {{ include "zenith.componentname" (list . "helm-exporter") }} - {{- with .Values.metrics.helmExporter.imagePullSecrets }} - imagePullSecrets: {{ toYaml . | nindent 8 }} - {{- end }} - securityContext: {{ toYaml .Values.metrics.helmExporter.podSecurityContext | nindent 8 }} - containers: - - name: helm-exporter - securityContext: {{ toYaml .Values.metrics.helmExporter.securityContext | nindent 12 }} - image: {{ - printf - "%s:%s" - .Values.metrics.helmExporter.image.repository - (default .Chart.AppVersion .Values.metrics.helmExporter.image.tag) - }} - imagePullPolicy: {{ .Values.metrics.helmExporter.image.pullPolicy }} - args: - # We are only worried about the info metric for releases in the target namespace - - "-info-metric=true" - - "-outdated-metric=false" - - "-timestamp-metric=false" - - "-latest-chart-version=false" - - "-status-in-metric=true" - - "-namespaces" - - {{ .Values.common.kubernetes.targetNamespace | quote }} - ports: - - name: http - containerPort: 9571 - protocol: TCP - livenessProbe: - httpGet: - path: /healthz - port: http - initialDelaySeconds: 10 - timeoutSeconds: 10 - readinessProbe: - httpGet: - path: /healthz - port: http - initialDelaySeconds: 10 - timeoutSeconds: 10 - resources: {{ toYaml .Values.metrics.helmExporter.resources | nindent 12 }} - {{- with .Values.metrics.helmExporter.nodeSelector }} - nodeSelector: {{ toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.metrics.helmExporter.affinity }} - affinity: {{ toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.metrics.helmExporter.tolerations }} - tolerations: {{ toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/charts/server/templates/metrics/helm-exporter/role.yaml b/charts/server/templates/metrics/helm-exporter/role.yaml deleted file mode 100644 index 5a0a8ef7..00000000 --- a/charts/server/templates/metrics/helm-exporter/role.yaml +++ /dev/null @@ -1,18 +0,0 @@ -{{- if .Values.metrics.enabled -}} -# This role allows the holder to list secrets in the Zenith target namespace -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} - namespace: {{ .Values.common.kubernetes.targetNamespace }} -rules: - - apiGroups: - - "" - resources: - - secrets - verbs: - - get - - list - - watch -{{- end }} diff --git a/charts/server/templates/metrics/helm-exporter/rolebinding.yaml b/charts/server/templates/metrics/helm-exporter/rolebinding.yaml deleted file mode 100644 index a1ce9f28..00000000 --- a/charts/server/templates/metrics/helm-exporter/rolebinding.yaml +++ /dev/null @@ -1,18 +0,0 @@ -{{- if .Values.metrics.enabled -}} -# This role binding allows the helm-exporter service account in the release namespace -# to list Helm release secrets in the target namespace -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} - namespace: {{ .Values.common.kubernetes.targetNamespace }} -subjects: - - kind: ServiceAccount - namespace: {{ .Release.Namespace }} - name: {{ include "zenith.componentname" (list . "helm-exporter") }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: {{ include "zenith.componentname" (list . "helm-exporter") }} -{{- end }} diff --git a/charts/server/templates/metrics/helm-exporter/service.yaml b/charts/server/templates/metrics/helm-exporter/service.yaml deleted file mode 100644 index 1a7e7634..00000000 --- a/charts/server/templates/metrics/helm-exporter/service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.metrics.enabled -}} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} -spec: - type: ClusterIP - ports: - - name: http - port: 9571 - targetPort: http - protocol: TCP - selector: {{ include "zenith.componentSelectorLabels" (list . "helm-exporter") | nindent 4 }} -{{- end }} diff --git a/charts/server/templates/metrics/helm-exporter/serviceaccount.yaml b/charts/server/templates/metrics/helm-exporter/serviceaccount.yaml deleted file mode 100644 index 125fd083..00000000 --- a/charts/server/templates/metrics/helm-exporter/serviceaccount.yaml +++ /dev/null @@ -1,7 +0,0 @@ -{{- if .Values.metrics.enabled }} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} -{{- end }} diff --git a/charts/server/templates/metrics/helm-exporter/servicemonitor.yaml b/charts/server/templates/metrics/helm-exporter/servicemonitor.yaml deleted file mode 100644 index 7ceb9e70..00000000 --- a/charts/server/templates/metrics/helm-exporter/servicemonitor.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- if .Values.metrics.enabled -}} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: {{ include "zenith.componentname" (list . "helm-exporter") }} - labels: {{ include "zenith.componentLabels" (list . "helm-exporter") | nindent 4 }} -spec: - selector: - matchLabels: {{ include "zenith.componentSelectorLabels" (list . "helm-exporter") | nindent 6 }} - endpoints: - - port: http - namespaceSelector: - matchNames: - # Restrict the ServiceMonitor to the relevant namespace - - {{ .Release.Namespace }} -{{- end }} diff --git a/charts/server/templates/sync/deployment.yaml b/charts/server/templates/sync/deployment.yaml index 2596682d..909acc94 100644 --- a/charts/server/templates/sync/deployment.yaml +++ b/charts/server/templates/sync/deployment.yaml @@ -31,6 +31,10 @@ spec: securityContext: {{ toYaml .Values.sync.securityContext | nindent 12 }} image: {{ printf "%s:%s" .Values.sync.image.repository (default .Chart.AppVersion .Values.sync.image.tag) }} imagePullPolicy: {{ .Values.sync.image.pullPolicy }} + ports: + - name: metrics + containerPort: 8080 + protocol: TCP # Tell the sync process which namespace it is running in env: - name: ZENITH_SYNC__KUBERNETES__SELF_NAMESPACE diff --git a/charts/server/templates/metrics/prometheusrule.yaml b/charts/server/templates/sync/prometheusrule.yaml similarity index 88% rename from charts/server/templates/metrics/prometheusrule.yaml rename to charts/server/templates/sync/prometheusrule.yaml index ff629b86..6705e1ff 100644 --- a/charts/server/templates/metrics/prometheusrule.yaml +++ b/charts/server/templates/sync/prometheusrule.yaml @@ -1,9 +1,14 @@ -{{- if and .Values.metrics.enabled .Values.metrics.prometheus.rules.enabled }} +{{- + if and + .Values.sync.enabled + .Values.metrics.enabled + .Values.metrics.prometheus.rules.enabled +}} apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: - name: {{ include "zenith.componentname" (list . "metrics") }} - labels: {{ include "zenith.componentLabels" (list . "metrics") | nindent 4 }} + name: {{ include "zenith.componentname" (list . "sync") }} + labels: {{ include "zenith.componentLabels" (list . "sync") | nindent 4 }} spec: groups: - name: zenith.recording-rules @@ -18,11 +23,6 @@ spec: expr: >- sum(topk(1, label_replace(zenith_service_endpoint_info, "status", "$1", "endpoint_status", "(.*)")) by(service_name, endpoint_id, status)) by(service_name, status) - # This metric indicates the status of the Helm release - - record: zenith_service_helm_status - expr: >- - count(topk(1, label_replace(helm_chart_info, "service_name", "$1", "release", "(.+)")) by(service_name, status)) by(service_name, status) - # Zenith service alerts - name: zenith.alerts rules: diff --git a/charts/server/templates/sync/service.yaml b/charts/server/templates/sync/service.yaml new file mode 100644 index 00000000..daa3ae48 --- /dev/null +++ b/charts/server/templates/sync/service.yaml @@ -0,0 +1,15 @@ +{{- if .Values.sync.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "zenith.componentname" (list . "sync") }} + labels: {{ include "zenith.componentLabels" (list . "sync") | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: metrics + port: 8080 + targetPort: metrics + protocol: TCP + selector: {{ include "zenith.componentSelectorLabels" (list . "sync") | nindent 4 }} +{{- end }} diff --git a/charts/server/templates/sync/servicemonitor.yaml b/charts/server/templates/sync/servicemonitor.yaml new file mode 100644 index 00000000..44d793d7 --- /dev/null +++ b/charts/server/templates/sync/servicemonitor.yaml @@ -0,0 +1,19 @@ +{{- + if and + .Values.sync.enabled + .Values.metrics.enabled + .Values.metrics.prometheus.monitor.enabled +}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "zenith.componentname" (list . "sync") }} + labels: {{ include "zenith.componentLabels" (list . "sync") | nindent 4 }} +spec: + endpoints: + - honorLabels: true + port: metrics + jobLabel: app.kubernetes.io/name + selector: + matchLabels: {{ include "zenith.componentSelectorLabels" (list . "sync") | nindent 6 }} +{{- end }} diff --git a/charts/server/values.yaml b/charts/server/values.yaml index 1788509d..d76bf2b0 100644 --- a/charts/server/values.yaml +++ b/charts/server/values.yaml @@ -156,99 +156,3 @@ metrics: enabled: true monitor: enabled: true - honorLabels: true - # Disable all the default collectors - collectors: [] - # Allow kube-state-metrics read-only access to our CRDs - rbac: - create: true - extraRules: - - apiGroups: - - zenith.stackhpc.com - resources: - - endpoints - - leases - - services - verbs: - - list - - watch - - get - # Configure kube-state-metrics to report only on our custom resources - extraArgs: - - --custom-resource-state-only=true - customResourceState: - enabled: true - config: - kind: CustomResourceStateMetrics - spec: - resources: - - groupVersionKind: - group: zenith.stackhpc.com - version: v1alpha1 - kind: Service - metricNamePrefix: zenith_service - labelsFromPath: - service_namespace: [metadata, namespace] - service_name: [metadata, name] - metrics: - - name: info - help: "Service info" - each: - type: Info - info: - labelsFromPath: - created_at: [metadata, creationTimestamp] - fingerprint: [spec, publicKeyFingerprint] - - - groupVersionKind: - group: zenith.stackhpc.com - version: v1alpha1 - kind: Endpoints - metricNamePrefix: zenith_service - labelsFromPath: - service_namespace: [metadata, namespace] - service_name: [metadata, name] - metrics: - - name: endpoint_info - help: "Metric for the endpoints of a service" - each: - type: Info - info: - path: [spec, endpoints] - labelFromKey: endpoint_id - labelsFromPath: - endpoint_address: [address] - endpoint_port: [port] - endpoint_status: [status] - - # Configuration for the Helm exporter - helmExporter: - # The image to use for the Helm exporter - # We use a synced version to avoid a dependency on Docker Hub - image: - repository: ghcr.io/stackhpc/zenith/helm-exporter - pullPolicy: IfNotPresent - tag: 1.2.16 - imagePullSecrets: [] - # Customise pod-level security context for helm exporter pods - podSecurityContext: - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 1000 - fsGroup: 1000 - # Customise container-level security context for helm exporter pods - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - readOnlyRootFilesystem: true - # Resources for helm exporter containers - resources: {} - # Customise annotations for helm exporter pods - podAnnotations: {} - # Customise node selector for helm exporter pods - nodeSelector: {} - # Customise tolerations for helm exporter pods - tolerations: [] - # Customise affinity rules for helm exporter pods - affinity: {} diff --git a/sync/requirements.txt b/sync/requirements.txt index d7ac428f..33df98f4 100644 --- a/sync/requirements.txt +++ b/sync/requirements.txt @@ -1,19 +1,26 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.4.0 +async-timeout==4.0.3 +attrs==23.2.0 certifi==2024.6.2 click==8.1.7 configomatic==0.2.0 easykube==0.3.2 easysemver==0.1.0 exceptiongroup==1.2.1 +frozenlist==1.4.1 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 idna==3.7 kube-custom-resource==0.3.0 +multidict==6.0.5 pydantic==2.7.3 pydantic_core==2.18.4 pyhelm3==0.3.3 PyYAML==6.0.1 sniffio==1.3.1 typing_extensions==4.12.1 +yarl==1.9.4 diff --git a/sync/setup.cfg b/sync/setup.cfg index cfaa85f9..4a8ed6e3 100755 --- a/sync/setup.cfg +++ b/sync/setup.cfg @@ -11,6 +11,7 @@ zip_safe = False include_package_data = True packages = find_namespace: install_requires = + aiohttp click configomatic[yaml] easykube diff --git a/sync/zenith/sync/main.py b/sync/zenith/sync/main.py index b5efe0db..57b17c37 100644 --- a/sync/zenith/sync/main.py +++ b/sync/zenith/sync/main.py @@ -2,6 +2,7 @@ import contextlib from . import config +from .metrics import metrics_server from .processor import load as load_processor from .store import load as load_store from .util import task_cancel_and_wait @@ -18,7 +19,7 @@ async def run(config_obj: config.SyncConfig): # We can't use gather because we want the entire command to exit if one # of the coroutines exits, even if that exit is clean done, not_done = await asyncio.wait( - [processor.run(store), store.run()], + [processor.run(store), store.run(), metrics_server(store, processor)], return_when = asyncio.FIRST_COMPLETED ) # However any exceptions are not raised until we try to fetch the results diff --git a/sync/zenith/sync/metrics.py b/sync/zenith/sync/metrics.py new file mode 100644 index 00000000..731886af --- /dev/null +++ b/sync/zenith/sync/metrics.py @@ -0,0 +1,128 @@ +import asyncio +import functools +import typing + +from aiohttp import web + +if typing.TYPE_CHECKING: + from .processor import Processor + from .store import Store + + +class Metric: + """ + Base class for metrics. + """ + # The prefix for the metric + prefix = None + # The suffix for the metric + suffix = None + # The type of the metric - info or guage + type = "info" + # The description of the metric + description = None + + def __init__(self): + self._objs = [] + + def add_obj(self, obj): + self._objs.append(obj) + + @property + def name(self): + return f"{self.prefix}_{self.suffix}" + + def labels(self, obj): + """ + The labels for the given object. + """ + return {} + + def value(self, obj): + """ + The value for the given object. + """ + return 1 + + def samples(self): + """ + Returns the samples for the metric, i.e. a list of (labels, value) tuples. + """ + for obj in self._objs: + yield self.labels(obj), self.value(obj) + + +def escape(content): + """ + Escape the given content for use in metric output. + """ + return str(content).replace("\\", r"\\").replace("\n", r"\n").replace('"', r"\"") + + +def format_value(value): + """ + Formats a value for output, e.g. using Go formatting. + """ + formatted = repr(value) + dot = formatted.find('.') + if value > 0 and dot > 6: + mantissa = f"{formatted[0]}.{formatted[1:dot]}{formatted[dot + 1:]}".rstrip("0.") + return f"{mantissa}e+0{dot - 1}" + else: + return formatted + + +def render_openmetrics(*metrics: Metric) -> typing.Tuple[str, bytes]: + """ + Renders the metrics using OpenMetrics text format. + """ + output = [] + for metric in metrics: + if metric.description: + output.append(f"# HELP {metric.name} {escape(metric.description)}\n") + output.append(f"# TYPE {metric.name} {metric.type}\n") + + for labels, value in metric.samples(): + if labels: + labelstr = "{{{0}}}".format( + ",".join([f'{k}="{escape(v)}"' for k, v in sorted(labels.items())]) + ) + else: + labelstr = "" + output.append(f"{metric.name}{labelstr} {format_value(value)}\n") + output.append("# EOF\n") + + return ( + "application/openmetrics-text; version=1.0.0; charset=utf-8", + "".join(output).encode("utf-8"), + ) + + +async def metrics_handler(store: 'Store', processor: 'Processor', request): + """ + Produce metrics for the store and processor. + """ + store_metrics = await store.metrics() + processor_metrics = await processor.metrics() + content_type, content = render_openmetrics(*store_metrics, *processor_metrics) + return web.Response(headers = {"Content-Type": content_type}, body = content) + + +async def metrics_server(store: 'Store', processor: 'Processor'): + """ + Launch a lightweight HTTP server to serve the metrics endpoint. + """ + app = web.Application() + app.add_routes([web.get("/metrics", functools.partial(metrics_handler, store, processor))]) + + runner = web.AppRunner(app, handle_signals = False) + await runner.setup() + + site = web.TCPSite(runner, "0.0.0.0", "8080", shutdown_timeout = 1.0) + await site.start() + + # Sleep until we need to clean up + try: + await asyncio.Event().wait() + finally: + await asyncio.shield(runner.cleanup()) diff --git a/sync/zenith/sync/processor/base.py b/sync/zenith/sync/processor/base.py index db4fe97f..0a7b8df6 100644 --- a/sync/zenith/sync/processor/base.py +++ b/sync/zenith/sync/processor/base.py @@ -3,7 +3,7 @@ import random import typing -from .. import config, model, store, util +from .. import config, metrics, model, store, util class EventQueue: @@ -183,6 +183,12 @@ async def service_removed(self, service: model.Service): Called when a service is removed and should reconcile as required. """ raise NotImplementedError + + async def metrics(self) -> typing.Iterable[metrics.Metric]: + """ + Produce metrics for the processor. + """ + return [] async def process_events(self, queue: EventQueue, worker_num: int): """ diff --git a/sync/zenith/sync/processor/helm.py b/sync/zenith/sync/processor/helm.py index e7b30083..4cb5db4e 100644 --- a/sync/zenith/sync/processor/helm.py +++ b/sync/zenith/sync/processor/helm.py @@ -10,11 +10,24 @@ from pyhelm3 import Client as HelmClient -from .. import config, model, store, util +from .. import config, metrics, model, store, util from . import base +class ServiceHelmStatus(metrics.Metric): + prefix = "zenith_service" + suffix = "helm_status" + description = "The Helm status for Zenith services" + + def labels(self, obj): + return { + "service_namespace": obj.release.namespace, + "service_name": obj.release.name, + "status": obj.status.value, + } + + class Processor(base.Processor): """ Reconciles services by using a Helm chart to create resources in Kubernetes. @@ -274,6 +287,17 @@ async def service_removed(self, service: model.Service): ) await secrets.delete(secret_name) + async def metrics(self) -> typing.Iterable[metrics.Metric]: + releases = await self.helm_client.list_releases( + all = True, + max_releases = 0, + namespace = self.config.target_namespace + ) + helm_status_metric = ServiceHelmStatus() + for release in releases: + helm_status_metric.add_obj(await release.current_revision()) + return [helm_status_metric] + async def _update_tls_mirror(self, source_object): """ Updates the mirror secret in the target namespace. diff --git a/sync/zenith/sync/store/base.py b/sync/zenith/sync/store/base.py index 02245ee7..70747205 100644 --- a/sync/zenith/sync/store/base.py +++ b/sync/zenith/sync/store/base.py @@ -1,13 +1,19 @@ import asyncio import typing -from .. import config, model +from .. import config, metrics, model class Store: """ Produces events when the underlying representation of a service changes. """ + async def metrics(self) -> typing.Iterable[metrics.Metric]: + """ + Produce metrics for the processor. + """ + return [] + async def watch(self) -> typing.Tuple[ typing.Iterable[model.Service], typing.AsyncIterable[model.Event] diff --git a/sync/zenith/sync/store/crd/store.py b/sync/zenith/sync/store/crd/store.py index c0f0f326..b4c6252f 100644 --- a/sync/zenith/sync/store/crd/store.py +++ b/sync/zenith/sync/store/crd/store.py @@ -6,7 +6,7 @@ from easykube import Configuration, ApiError from kube_custom_resource import CustomResourceRegistry -from ... import config, model +from ... import config, metrics, model from .. import base @@ -14,6 +14,46 @@ from .models import v1alpha1 as api +class StoreMetric(metrics.Metric): + prefix = "zenith_service" + + def labels(self, obj): + return { + "service_namespace": obj.metadata.namespace, + "service_name": obj.metadata.name, + } + + +class ServiceInfo(StoreMetric): + suffix = "info" + description = "Information about Zenith services" + + def labels(self, obj): + return { + **super().labels(obj), + "created_at": obj.metadata["creationTimestamp"], + "fingerprint": obj.get("spec", {}).get("publicKeyFingerprint", ""), + } + + +class ServiceEndpointInfo(StoreMetric): + suffix = "endpoint_info" + description = "Information about the endpoints for Zenith services" + + def samples(self): + for obj in self._objs: + labels = super().labels(obj) + for name, endpoint in obj.get("spec", {}).get("endpoints", {}).items(): + endpoint_labels = { + **labels, + "endpoint_id": name, + "endpoint_address": endpoint["address"], + "endpoint_port": endpoint["port"], + "endpoint_status": endpoint["status"], + } + yield endpoint_labels, 1 + + class Store(base.Store): """ Store implementation that provides access to services stored in Consul. @@ -156,6 +196,19 @@ async def run(self): # Wait for the configured duration await asyncio.sleep(self.config.crd_endpoint_check_interval) + async def metrics(self) -> typing.Iterable[metrics.Metric]: + ekservices = await self._ekresource_for_model(api.Service) + service_info_metric = ServiceInfo() + async for service in ekservices.list(): + service_info_metric.add_obj(service) + + ekendpoints = await self._ekresource_for_model(api.Endpoints) + endpoints_info_metric = ServiceEndpointInfo() + async for endpoints in ekendpoints.list(): + endpoints_info_metric.add_obj(endpoints) + + return [service_info_metric, endpoints_info_metric] + @classmethod def from_config(cls, config_obj: config.SyncConfig) -> "Store": return cls(config_obj.kubernetes)