From bc3d3c2ac0b60b1f46df5c1256c8b09ae7aeb097 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 27 Oct 2025 14:53:56 +0100 Subject: [PATCH 1/3] feat: add sentry to k8s watcher --- bases/renku_data_services/k8s_cache/config.py | 4 +++ .../k8s_cache/dependencies.py | 35 ++++++++++--------- bases/renku_data_services/k8s_cache/main.py | 23 ++++++++++-- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/bases/renku_data_services/k8s_cache/config.py b/bases/renku_data_services/k8s_cache/config.py index 378900083..dfedf00f3 100644 --- a/bases/renku_data_services/k8s_cache/config.py +++ b/bases/renku_data_services/k8s_cache/config.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from typing import Self +from renku_data_services.app_config.config import SentryConfig from renku_data_services.db_config.config import DBConfig @@ -73,6 +74,7 @@ class Config: metrics: _MetricsConfig image_builders: _ImageBuilderConfig v1_services: _V1ServicesConfig + sentry: SentryConfig @classmethod def from_env(cls) -> Config: @@ -82,10 +84,12 @@ def from_env(cls) -> Config: metrics = _MetricsConfig.from_env() image_builders = _ImageBuilderConfig.from_env() v1_services = _V1ServicesConfig.from_env() + sentry = SentryConfig.from_env() return cls( db=db, k8s=k8s, metrics=metrics, image_builders=image_builders, v1_services=v1_services, + sentry=sentry, ) diff --git a/bases/renku_data_services/k8s_cache/dependencies.py b/bases/renku_data_services/k8s_cache/dependencies.py index f01d706ea..bce4e7e99 100644 --- a/bases/renku_data_services/k8s_cache/dependencies.py +++ b/bases/renku_data_services/k8s_cache/dependencies.py @@ -16,33 +16,33 @@ class DependencyManager: config: Config - quota_repo: QuotaRepository - _k8s_cache: K8sDbCache | None = None + # quota_repo: QuotaRepository + # _k8s_cache: K8sDbCache | None = None + + _quota_repo: QuotaRepository | None = field(default=None, repr=False, init=False) + _k8s_cache: K8sDbCache | None = field(default=None, repr=False, init=False) _metrics_repo: MetricsRepository | None = field(default=None, repr=False, init=False) _metrics: StagingMetricsService | None = field(default=None, repr=False, init=False) _rp_repo: ResourcePoolRepository | None = field(default=None, repr=False, init=False) _cluster_repo: ClusterRepository | None = field(default=None, repr=False, init=False) - @property def metrics_repo(self) -> MetricsRepository: """The DB adapter for metrics.""" if not self._metrics_repo: self._metrics_repo = MetricsRepository(session_maker=self.config.db.async_session_maker) return self._metrics_repo - @property def metrics(self) -> StagingMetricsService: """The metrics service interface.""" if not self._metrics: - self._metrics = StagingMetricsService(enabled=self.config.metrics.enabled, metrics_repo=self.metrics_repo) + self._metrics = StagingMetricsService(enabled=self.config.metrics.enabled, metrics_repo=self.metrics_repo()) return self._metrics - @property def rp_repo(self) -> ResourcePoolRepository: """The resource pool repository.""" if not self._rp_repo: self._rp_repo = ResourcePoolRepository( - session_maker=self.config.db.async_session_maker, quotas_repo=self.quota_repo + session_maker=self.config.db.async_session_maker, quotas_repo=self.quota_repo() ) return self._rp_repo @@ -52,7 +52,6 @@ def cluster_repo(self) -> ClusterRepository: self._cluster_repo = ClusterRepository(session_maker=self.config.db.async_session_maker) return self._cluster_repo - @property def k8s_cache(self) -> K8sDbCache: """The DB adapter for the k8s cache.""" if not self._k8s_cache: @@ -61,19 +60,21 @@ def k8s_cache(self) -> K8sDbCache: ) return self._k8s_cache + def quota_repo(self) -> QuotaRepository: + """The resource quota repository.""" + if not self._quota_repo: + # NOTE: We only need the QuotaRepository to instantiate the ResourcePoolRepository which is used to get + # the resource class and pool information for metrics. We don't need quota information for metrics at all + # so we use the dummy client for quotas here as we don't actually access k8s, just the db. + self._quota_repo = QuotaRepository( + DummyCoreClient({}, {}), DummySchedulingClient({}), namespace=self.config.k8s.renku_namespace + ) + return self._quota_repo + @classmethod def from_env(cls) -> "DependencyManager": """Create a config from environment variables.""" config = Config.from_env() - - # NOTE: We only need the QuotaRepository to instantiate the ResourcePoolRepository which is used to get - # the resource class and pool information for metrics. We don't need quota information for metrics at all - # so we use the dummy client for quotas here as we don't actually access k8s, just the db. - quota_repo = QuotaRepository( - DummyCoreClient({}, {}), DummySchedulingClient({}), namespace=config.k8s.renku_namespace - ) - return cls( config=config, - quota_repo=quota_repo, ) diff --git a/bases/renku_data_services/k8s_cache/main.py b/bases/renku_data_services/k8s_cache/main.py index ffdd43b8f..fb0434361 100644 --- a/bases/renku_data_services/k8s_cache/main.py +++ b/bases/renku_data_services/k8s_cache/main.py @@ -2,6 +2,10 @@ import asyncio +import sentry_sdk +from sentry_sdk.integrations.asyncio import AsyncioIntegration +from sentry_sdk.integrations.grpc import GRPCIntegration + from renku_data_services.app_config import logging from renku_data_services.k8s.clients import K8sClusterClient from renku_data_services.k8s.config import KubeConfigEnv, get_clusters @@ -20,6 +24,21 @@ async def main() -> None: dm = DependencyManager.from_env() default_kubeconfig = KubeConfigEnv() + if dm.config.sentry.enabled: + logger.info("enabling sentry") + sentry_sdk.init( + dsn=dm.config.sentry.dsn, + environment=dm.config.sentry.environment, + release=dm.config.sentry.release or None, + integrations=[ + AsyncioIntegration(), + GRPCIntegration(), + ], + enable_tracing=dm.config.sentry.sample_rate > 0, + traces_sample_rate=dm.config.sentry.sample_rate, + in_app_include=["renku_data_services"], + ) + clusters: dict[ClusterId, K8sClusterClient] = {} async for client in get_clusters( kube_conf_root_dir=dm.config.k8s.kube_config_root, @@ -35,10 +54,10 @@ async def main() -> None: kinds.extend([BUILD_RUN_GVK, TASK_RUN_GVK]) logger.info(f"Resources: {kinds}") watcher = K8sWatcher( - handler=k8s_object_handler(dm.k8s_cache, dm.metrics, rp_repo=dm.rp_repo), + handler=k8s_object_handler(dm.k8s_cache(), dm.metrics(), rp_repo=dm.rp_repo()), clusters=clusters, kinds=kinds, - db_cache=dm.k8s_cache, + db_cache=dm.k8s_cache(), ) await watcher.start() logger.info("started watching resources") From dc9283f5f20d0534ee30c4ff961a303b9fe7ca6f Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 27 Oct 2025 14:27:45 +0000 Subject: [PATCH 2/3] send test error --- bases/renku_data_services/k8s_cache/main.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bases/renku_data_services/k8s_cache/main.py b/bases/renku_data_services/k8s_cache/main.py index fb0434361..6eb2d50b4 100644 --- a/bases/renku_data_services/k8s_cache/main.py +++ b/bases/renku_data_services/k8s_cache/main.py @@ -64,6 +64,12 @@ async def main() -> None: # create file for liveness probe with open("/tmp/cache_ready", "w") as f: # nosec B108 f.write("ready") + with sentry_sdk.new_scope() as scope: + scope.set_level("debug") + try: + raise RuntimeError("Test error") + except: # noqa: E722 + sentry_sdk.capture_exception() await watcher.wait() From 20f86a1847ab18086d7d9b1402f0fd07f327c677 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 27 Oct 2025 14:42:34 +0000 Subject: [PATCH 3/3] cleanup and remove test error --- bases/renku_data_services/k8s_cache/dependencies.py | 3 --- bases/renku_data_services/k8s_cache/main.py | 6 ------ 2 files changed, 9 deletions(-) diff --git a/bases/renku_data_services/k8s_cache/dependencies.py b/bases/renku_data_services/k8s_cache/dependencies.py index bce4e7e99..d4d701921 100644 --- a/bases/renku_data_services/k8s_cache/dependencies.py +++ b/bases/renku_data_services/k8s_cache/dependencies.py @@ -16,9 +16,6 @@ class DependencyManager: config: Config - # quota_repo: QuotaRepository - # _k8s_cache: K8sDbCache | None = None - _quota_repo: QuotaRepository | None = field(default=None, repr=False, init=False) _k8s_cache: K8sDbCache | None = field(default=None, repr=False, init=False) _metrics_repo: MetricsRepository | None = field(default=None, repr=False, init=False) diff --git a/bases/renku_data_services/k8s_cache/main.py b/bases/renku_data_services/k8s_cache/main.py index 6eb2d50b4..fb0434361 100644 --- a/bases/renku_data_services/k8s_cache/main.py +++ b/bases/renku_data_services/k8s_cache/main.py @@ -64,12 +64,6 @@ async def main() -> None: # create file for liveness probe with open("/tmp/cache_ready", "w") as f: # nosec B108 f.write("ready") - with sentry_sdk.new_scope() as scope: - scope.set_level("debug") - try: - raise RuntimeError("Test error") - except: # noqa: E722 - sentry_sdk.capture_exception() await watcher.wait()