diff --git a/paasta_tools/instance/hpa_metrics_parser.py b/paasta_tools/instance/hpa_metrics_parser.py index 47f77e2b6d..5cfeb36413 100644 --- a/paasta_tools/instance/hpa_metrics_parser.py +++ b/paasta_tools/instance/hpa_metrics_parser.py @@ -1,8 +1,6 @@ from typing import Optional -from kubernetes.client.models.v2beta2_object_metric_status import ( - V2beta2ObjectMetricStatus, -) +from kubernetes.client import V2beta2ObjectMetricStatus from mypy_extensions import TypedDict diff --git a/paasta_tools/kubernetes/remote_run.py b/paasta_tools/kubernetes/remote_run.py index 24842fca28..4d70dbf25b 100644 --- a/paasta_tools/kubernetes/remote_run.py +++ b/paasta_tools/kubernetes/remote_run.py @@ -20,6 +20,7 @@ from typing import TypedDict from kubernetes.client import AuthenticationV1TokenRequest +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Job from kubernetes.client import V1ObjectMeta from kubernetes.client import V1Pod @@ -28,7 +29,6 @@ from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1ServiceAccount -from kubernetes.client import V1Subject from kubernetes.client import V1TokenRequestSpec from kubernetes.client.exceptions import ApiException @@ -495,7 +495,7 @@ def bind_role_to_service_account( name=role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", name=service_account, ), diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 112d5e811d..35c3818d7c 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -50,6 +50,7 @@ from kubernetes import config as kube_config from kubernetes.client import CoreV1Event from kubernetes.client import models +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Affinity from kubernetes.client import V1AWSElasticBlockStoreVolumeSource from kubernetes.client import V1Capabilities @@ -113,7 +114,6 @@ from kubernetes.client import V1ServiceAccountTokenProjection from kubernetes.client import V1StatefulSet from kubernetes.client import V1StatefulSetSpec -from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume @@ -3010,7 +3010,7 @@ def ensure_paasta_api_rolebinding(kube_client: KubeClient, namespace: str) -> No name="paasta-api-server-per-namespace", ), subjects=[ - V1Subject( + RbacV1Subject( kind="User", name="yelp.com/paasta-api-server", ), @@ -3393,20 +3393,30 @@ def pod_disruption_budget_for_service_instance( max_unavailable: Union[str, int], namespace: str, ) -> V1PodDisruptionBudget: + selector = V1LabelSelector( + match_labels={ + "paasta.yelp.com/service": service, + "paasta.yelp.com/instance": instance, + } + ) + if load_system_paasta_config().get_enable_unhealthy_pod_eviction(): + spec = V1PodDisruptionBudgetSpec( + max_unavailable=max_unavailable, + unhealthy_pod_eviction_policy="AlwaysAllow", # XXX: should this be configurable? + selector=selector, + ) + else: + spec = V1PodDisruptionBudgetSpec( + max_unavailable=max_unavailable, + selector=selector, + ) + return V1PodDisruptionBudget( metadata=V1ObjectMeta( name=get_kubernetes_app_name(service, instance), namespace=namespace, ), - spec=V1PodDisruptionBudgetSpec( - max_unavailable=max_unavailable, - selector=V1LabelSelector( - match_labels={ - "paasta.yelp.com/service": service, - "paasta.yelp.com/instance": instance, - } - ), - ), + spec=spec, ) @@ -4393,7 +4403,7 @@ def ensure_service_account( name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=sa_name, diff --git a/paasta_tools/utils.py b/paasta_tools/utils.py index 9dc25d3a2b..171387e708 100644 --- a/paasta_tools/utils.py +++ b/paasta_tools/utils.py @@ -2066,6 +2066,7 @@ class SystemPaastaConfigDict(TypedDict, total=False): enable_tron_tsc: bool default_spark_iam_user: str default_spark_driver_pool_override: str + enable_unhealthy_pod_eviction: bool def load_system_paasta_config( @@ -2847,6 +2848,15 @@ def get_ecosystem_for_cluster(self, cluster: str) -> Optional[str]: # NOTE: this should never happen unless we've gotten bad data return None + def get_enable_unhealthy_pod_eviction(self) -> bool: + """ + Feature toggle to enable PDBs to evict unhealthy pods. + + Defaults to False (meaning unhealthy pods will not be evicted) as we have legacy clusters where this is + unsupported. + """ + return self.config_dict.get("enable_unhealthy_pod_eviction", False) + def _run( command: Union[str, List[str]], diff --git a/requirements-minimal.txt b/requirements-minimal.txt index ac33bf8530..b322aebf4d 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -29,8 +29,9 @@ kazoo >= 2.0.0 # from our pinned-dependencies. The upper-bound should generally be the latest kubernetes version # that we can use across our different clusters (e.g, if X.0.0 removes an API version that we use # in any cluster, this upper-bound should be < X.0.0) +# NOTE: the above is not exactly true anymore due to our legacy clusters... # we should probably also be better at setting a correct lower-bound, but that's less likely to cause issues. -kubernetes >= 18.20.0, < 26.0.0 +kubernetes >= 27.0.0, < 30.0.0 ldap3 manhole mypy-extensions >= 0.3.0 diff --git a/requirements.txt b/requirements.txt index e1c3b6e00e..5843b27990 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,7 +46,7 @@ jmespath==0.9.3 jsonref==0.1 jsonschema==2.5.1 kazoo==2.8.0 -kubernetes==24.2.0 +kubernetes==29.0.0 ldap3==2.6 manhole==1.5.0 MarkupSafe==1.1.1 @@ -57,7 +57,7 @@ mypy-extensions==0.4.3 nats-py==2.8.0 networkx==2.4 nulltype==2.3.1 -oauthlib==3.1.0 +oauthlib==3.3.1 objgraph==3.4.0 PasteDeploy==1.5.2 plaster==1.1.2 diff --git a/tests/kubernetes/test_remote_run.py b/tests/kubernetes/test_remote_run.py index c770c8979b..0b9f7468df 100644 --- a/tests/kubernetes/test_remote_run.py +++ b/tests/kubernetes/test_remote_run.py @@ -17,13 +17,13 @@ import pytest from kubernetes.client import AuthenticationV1TokenRequest +from kubernetes.client import RbacV1Subject from kubernetes.client import V1ObjectMeta from kubernetes.client import V1PolicyRule from kubernetes.client import V1Role from kubernetes.client import V1RoleBinding from kubernetes.client import V1RoleRef from kubernetes.client import V1ServiceAccount -from kubernetes.client import V1Subject from kubernetes.client import V1TokenRequestSpec from kubernetes.client.exceptions import ApiException @@ -406,7 +406,7 @@ def test_bind_role_to_service_account(): name="somerole", ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", name="somesa", ), diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py index c85e0ca16e..dd4bc4bc46 100644 --- a/tests/test_kubernetes_tools.py +++ b/tests/test_kubernetes_tools.py @@ -13,6 +13,7 @@ from hypothesis.strategies import floats from hypothesis.strategies import integers from kubernetes import client as kube_client +from kubernetes.client import RbacV1Subject from kubernetes.client import V1Affinity from kubernetes.client import V1AWSElasticBlockStoreVolumeSource from kubernetes.client import V1Capabilities @@ -62,7 +63,6 @@ from kubernetes.client import V1ServiceAccountTokenProjection from kubernetes.client import V1StatefulSet from kubernetes.client import V1StatefulSetSpec -from kubernetes.client import V1Subject from kubernetes.client import V1TCPSocketAction from kubernetes.client import V1TopologySpreadConstraint from kubernetes.client import V1Volume @@ -3591,14 +3591,37 @@ def test_max_unavailable(instances, bmf): assert type(res) is int -def test_pod_disruption_budget_for_service_instance(): +@pytest.mark.parametrize( + "unhealthy_eviction_enabled,expected_policy", + ( + (False, None), + (True, "AlwaysAllow"), + ), +) +def test_pod_disruption_budget_for_service_instance( + unhealthy_eviction_enabled, expected_policy +): mock_namespace = "paasta" - x = pod_disruption_budget_for_service_instance( - service="foo_1", - instance="bar_1", - max_unavailable="10%", - namespace=mock_namespace, - ) + with mock.patch( + "paasta_tools.utils.load_system_paasta_config", + return_value=SystemPaastaConfig( + { + "enable_unhealthy_pod_eviction": unhealthy_eviction_enabled, + }, + "/fake/dir/", + ), + autospec=True, + ) as mock_load_system_paasta_config, mock.patch( + "paasta_tools.kubernetes_tools.load_system_paasta_config", + new=mock_load_system_paasta_config, + autospec=False, + ): + x = pod_disruption_budget_for_service_instance( + service="foo_1", + instance="bar_1", + max_unavailable="10%", + namespace=mock_namespace, + ) assert x.metadata.name == "foo--1-bar--1" assert x.metadata.namespace == "paasta" @@ -3607,6 +3630,7 @@ def test_pod_disruption_budget_for_service_instance(): "paasta.yelp.com/service": "foo_1", "paasta.yelp.com/instance": "bar_1", } + assert x.spec.unhealthy_pod_eviction_policy == expected_policy def test_create_pod_disruption_budget(): @@ -4772,7 +4796,7 @@ def test_ensure_service_account_with_k8s_role_new(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -4825,7 +4849,7 @@ def test_ensure_service_account_existing(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -4886,7 +4910,7 @@ def test_ensure_service_account_existing_different_role(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name, @@ -5010,7 +5034,7 @@ def test_ensure_service_account_caps_with_k8s(): name=k8s_role, ), subjects=[ - V1Subject( + RbacV1Subject( kind="ServiceAccount", namespace=namespace, name=expected_sa_name,