From 06dcac1cd8a60ef25b274e268721de609716f032 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Fri, 11 Jul 2025 08:48:33 -0700 Subject: [PATCH] Enable conditional setting of unhealthyPodEvictionPolicy on PDBs This will ensure that pods in CrashLoopBackOff don't impact pool scaledowns (since otherwise, Karpenter will refuse to consolidate nodes due to PDBs) This is not enabled by default due to our legacy clusters, but will be toggled at a per-cluster level through SystemPaastaConfig. This PR is also somewhat spicy due to these legacy clusters, so we'll definitely need to do some testing to make sure that the upgraded k8s clientlib doesn't cause chaos there (or we'll have to take more drastic action). That said, I've upgraded the k8s clientlib version to a somewhat more modern version to match the oldest non-legacy cluster we have and therefore had to make a number of changes (e.g., V1Subject was renamed to RbacV1Subject, etc) --- paasta_tools/instance/hpa_metrics_parser.py | 4 +- paasta_tools/kubernetes/remote_run.py | 4 +- paasta_tools/kubernetes_tools.py | 34 +++++++++------ paasta_tools/utils.py | 10 +++++ requirements-minimal.txt | 3 +- requirements.txt | 4 +- tests/kubernetes/test_remote_run.py | 4 +- tests/test_kubernetes_tools.py | 48 +++++++++++++++------ 8 files changed, 77 insertions(+), 34 deletions(-) diff --git a/paasta_tools/instance/hpa_metrics_parser.py b/paasta_tools/instance/hpa_metrics_parser.py index 47f77e2b6d..5cfeb36413 100644 --- a/paasta_tools/instance/hpa_metrics_parser.py +++ b/paasta_tools/instance/hpa_metrics_parser.py @@ -1,8 +1,6 @@ from typing import Optional -from kubernetes.client.models.v2beta2_object_metric_status import ( - V2beta2ObjectMetricStatus, -) +from kubernetes.client import V2beta2ObjectMetricStatus from mypy_extensions import TypedDict diff --git a/paasta_tools/kubernetes/remote_run.py b/paasta_tools/kubernetes/remote_run.py index 24842fca28..4d70dbf25b 100644 --- a/paasta_tools/kubernetes/remote_run.py +++ b/paasta_tools/kubernetes/remote_run.py @@ -20,6 +20,7 @@ from typing import TypedDict from kubernetes.client import AuthenticationV1TokenRequest +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Job from kubernetes.client import V1ObjectMeta from kubernetes.client import V1Pod @@ -28,7 +29,6 @@ from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1ServiceAccount -from kubernetes.client import V1Subject from kubernetes.client import V1TokenRequestSpec from kubernetes.client.exceptions import ApiException @@ -495,7 +495,7 @@ def bind_role_to_service_account( name=role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", name=service_account, ), diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 112d5e811d..35c3818d7c 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -50,6 +50,7 @@ from kubernetes import config as kube_config from kubernetes.client import CoreV1Event from kubernetes.client import models +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Affinity from kubernetes.client import V1AWSElasticBlockStoreVolumeSource from kubernetes.client import V1Capabilities @@ -113,7 +114,6 @@ from kubernetes.client import V1ServiceAccountTokenProjection from kubernetes.client import V1StatefulSet from kubernetes.client import V1StatefulSetSpec -from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume @@ -3010,7 +3010,7 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No name="paasta-api-server-per-namespace", ), subjects=[ - V1Subject( + RbacV1Subject( kind="User", name="yelp.com/paasta-api-server", ), @@ -3393,20 +3393,30 @@ def pod_disruption_budget_for_service_instance( max_unavailable: Union[str, int], namespace: str, ) -> V1PodDisruptionBudget: + selector = V1LabelSelector( + match_labels={ + "paasta.yelp.com/service": service, + "paasta.yelp.com/instance": instance, + } + ) + if load_system_paasta_config().get_enable_unhealthy_pod_eviction(): + spec = V1PodDisruptionBudgetSpec( + max_unavailable=max_unavailable, + unhealthy_pod_eviction_policy="AlwaysAllow", # XXX: should this be configurable? + selector=selector, + ) + else: + spec = V1PodDisruptionBudgetSpec( + max_unavailable=max_unavailable, + selector=selector, + ) + return V1PodDisruptionBudget( metadata=V1ObjectMeta( name=get_kubernetes_app_name(service, instance), namespace=namespace, ), - spec=V1PodDisruptionBudgetSpec( - max_unavailable=max_unavailable, - selector=V1LabelSelector( - match_labels={ - "paasta.yelp.com/service": service, - "paasta.yelp.com/instance": instance, - } - ), - ), + spec=spec, ) @@ -4393,7 +4403,7 @@ def ensure_service_account( name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=sa_name, diff --git a/paasta_tools/utils.py b/paasta_tools/utils.py index 9dc25d3a2b..171387e708 100644 --- a/paasta_tools/utils.py +++ b/paasta_tools/utils.py @@ -2066,6 +2066,7 @@ class SystemPaastaConfigDict(TypedDict, total=False): enable_tron_tsc: bool default_spark_iam_user: str default_spark_driver_pool_override: str + enable_unhealthy_pod_eviction: bool def load_system_paasta_config( @@ -2847,6 +2848,15 @@ def get_ecosystem_for_cluster(self, cluster: str) -> Optional[str]: # NOTE: this should never happen unless we've gotten bad data return None + def get_enable_unhealthy_pod_eviction(self) -> bool: + """ + Feature toggle to enable PDBs to evict unhealthy pods. + + Defaults to False (meaning unhealthy pods will not be evicted) as we have legacy clusters where this is + unsupported. + """ + return self.config_dict.get("enable_unhealthy_pod_eviction", False) + def _run( command: Union[str, List[str]], diff --git a/requirements-minimal.txt b/requirements-minimal.txt index ac33bf8530..b322aebf4d 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -29,8 +29,9 @@ kazoo >= 2.0.0 # from our pinned-dependencies. The upper-bound should generally be the latest kubernetes version # that we can use across our different clusters (e.g, if X.0.0 removes an API version that we use # in any cluster, this upper-bound should be < X.0.0) +# NOTE: the above is not exactly true anymore due to our legacy clusters... # we should probably also be better at setting a correct lower-bound, but that's less likely to cause issues. -kubernetes >= 18.20.0, < 26.0.0 +kubernetes >= 27.0.0, < 30.0.0 ldap3 manhole mypy-extensions >= 0.3.0 diff --git a/requirements.txt b/requirements.txt index e1c3b6e00e..5843b27990 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,7 +46,7 @@ jmespath==0.9.3 jsonref==0.1 jsonschema==2.5.1 kazoo==2.8.0 -kubernetes==24.2.0 +kubernetes==29.0.0 ldap3==2.6 manhole==1.5.0 MarkupSafe==1.1.1 @@ -57,7 +57,7 @@ mypy-extensions==0.4.3 nats-py==2.8.0 networkx==2.4 nulltype==2.3.1 -oauthlib==3.1.0 +oauthlib==3.3.1 objgraph==3.4.0 PasteDeploy==1.5.2 plaster==1.1.2 diff --git a/tests/kubernetes/test_remote_run.py b/tests/kubernetes/test_remote_run.py index c770c8979b..0b9f7468df 100644 --- a/tests/kubernetes/test_remote_run.py +++ b/tests/kubernetes/test_remote_run.py @@ -17,13 +17,13 @@ import pytest from kubernetes.client import AuthenticationV1TokenRequest +from kubernetes.client import RbacV1Subject from kubernetes.client import V1ObjectMeta from kubernetes.client import V1PolicyRule from kubernetes.client import V1Role from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1ServiceAccount -from kubernetes.client import V1Subject from kubernetes.client import V1TokenRequestSpec from kubernetes.client.exceptions import ApiException @@ -406,7 +406,7 @@ def test_bind_role_to_service_account(): name="somerole", ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", name="somesa", ), diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py index c85e0ca16e..dd4bc4bc46 100644 --- a/tests/test_kubernetes_tools.py +++ b/tests/test_kubernetes_tools.py @@ -13,6 +13,7 @@ from hypothesis.strategies import floats from hypothesis.strategies import integers from kubernetes import client as kube_client +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Affinity from kubernetes.client import V1AWSElasticBlockStoreVolumeSource from kubernetes.client import V1Capabilities @@ -62,7 +63,6 @@ from kubernetes.client import V1ServiceAccountTokenProjection from kubernetes.client import V1StatefulSet from kubernetes.client import V1StatefulSetSpec -from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume @@ -3591,14 +3591,37 @@ def test_max_unavailable(instances, bmf): assert type(res) is int -def test_pod_disruption_budget_for_service_instance(): +@pytest.mark.parametrize( + "unhealthy_eviction_enabled,expected_policy", + ( + (False, None), + (True, "AlwaysAllow"), + ), +) +def test_pod_disruption_budget_for_service_instance( + unhealthy_eviction_enabled, expected_policy +): mock_namespace = "paasta" - x = pod_disruption_budget_for_service_instance( - service="foo_1", - instance="bar_1", - max_unavailable="10%", - namespace=mock_namespace, - ) + with mock.patch( + "paasta_tools.utils.load_system_paasta_config", + return_value=SystemPaastaConfig( + { + "enable_unhealthy_pod_eviction": unhealthy_eviction_enabled, + }, + "/fake/dir/", + ), + autospec=True, + ) as mock_load_system_paasta_config, mock.patch( + "paasta_tools.kubernetes_tools.load_system_paasta_config", + new=mock_load_system_paasta_config, + autospec=False, + ): + x = pod_disruption_budget_for_service_instance( + service="foo_1", + instance="bar_1", + max_unavailable="10%", + namespace=mock_namespace, + ) assert x.metadata.name == "foo--1-bar--1" assert x.metadata.namespace == "paasta" @@ -3607,6 +3630,7 @@ def test_pod_disruption_budget_for_service_instance(): "paasta.yelp.com/service": "foo_1", "paasta.yelp.com/instance": "bar_1", } + assert x.spec.unhealthy_pod_eviction_policy == expected_policy def test_create_pod_disruption_budget(): @@ -4772,7 +4796,7 @@ def test_ensure_service_account_with_k8s_role_new(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -4825,7 +4849,7 @@ def test_ensure_service_account_existing(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -4886,7 +4910,7 @@ def test_ensure_service_account_existing_different_role(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -5010,7 +5034,7 @@ def test_ensure_service_account_caps_with_k8s(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name,