From eeb11796596877d671353f82377cba20289719b6 Mon Sep 17 00:00:00 2001 From: Yash Gorana Date: Wed, 7 Feb 2024 09:47:30 +0530 Subject: [PATCH] [k8s] add kr8s wait timeouts --- .../syft/src/syft/custom_worker/builder_docker.py | 5 ++--- packages/syft/src/syft/custom_worker/builder_k8s.py | 12 ++++++++++-- .../syft/src/syft/custom_worker/builder_types.py | 12 +++++++++++- packages/syft/src/syft/custom_worker/runner_k8s.py | 12 ++++++++++-- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/packages/syft/src/syft/custom_worker/builder_docker.py b/packages/syft/src/syft/custom_worker/builder_docker.py index e1f24520c25..9eda180e08b 100644 --- a/packages/syft/src/syft/custom_worker/builder_docker.py +++ b/packages/syft/src/syft/custom_worker/builder_docker.py @@ -10,6 +10,7 @@ import docker # relative +from .builder_types import BUILD_IMAGE_TIMEOUT_SEC from .builder_types import BuilderBase from .builder_types import ImageBuildResult from .builder_types import ImagePushResult @@ -18,8 +19,6 @@ class DockerBuilder(BuilderBase): - BUILD_MAX_WAIT = 30 * 60 - def build_image( self, tag: str, @@ -40,7 +39,7 @@ def build_image( with contextlib.closing(docker.from_env()) as client: image_result, logs = client.images.build( tag=tag, - timeout=self.BUILD_MAX_WAIT, + timeout=BUILD_IMAGE_TIMEOUT_SEC, buildargs=buildargs, **kwargs, ) diff --git a/packages/syft/src/syft/custom_worker/builder_k8s.py b/packages/syft/src/syft/custom_worker/builder_k8s.py index 395028d69b4..1be16d3c0ac 100644 --- a/packages/syft/src/syft/custom_worker/builder_k8s.py +++ b/packages/syft/src/syft/custom_worker/builder_k8s.py @@ -11,9 +11,11 @@ from kr8s.objects import Secret # relative +from .builder_types import BUILD_IMAGE_TIMEOUT_SEC from .builder_types import BuilderBase from .builder_types import ImageBuildResult from .builder_types import ImagePushResult +from .builder_types import PUSH_IMAGE_TIMEOUT_SEC from .k8s import INTERNAL_REGISTRY_HOST from .k8s import JOB_COMPLETION_TTL from .k8s import KUBERNETES_NAMESPACE @@ -66,7 +68,10 @@ def build_image( ) # wait for job to complete/fail - job.wait(["condition=Complete", "condition=Failed"]) + job.wait( + ["condition=Complete", "condition=Failed"], + timeout=BUILD_IMAGE_TIMEOUT_SEC, + ) # get logs logs = self._get_logs(job) @@ -119,7 +124,10 @@ def push_image( push_secret=push_secret, ) - job.wait(["condition=Complete", "condition=Failed"]) + job.wait( + ["condition=Complete", "condition=Failed"], + timeout=PUSH_IMAGE_TIMEOUT_SEC, + ) exit_code = self._get_exit_code(job)[0] logs = self._get_logs(job) except Exception: diff --git a/packages/syft/src/syft/custom_worker/builder_types.py b/packages/syft/src/syft/custom_worker/builder_types.py index 53c27788791..8007bf476e9 100644 --- a/packages/syft/src/syft/custom_worker/builder_types.py +++ b/packages/syft/src/syft/custom_worker/builder_types.py @@ -7,7 +7,17 @@ # third party from pydantic import BaseModel -__all__ = ["BuilderBase", "ImageBuildResult", "ImagePushResult"] +__all__ = [ + "BuilderBase", + "ImageBuildResult", + "ImagePushResult", + "BUILD_IMAGE_TIMEOUT_SEC", + "PUSH_IMAGE_TIMEOUT_SEC", +] + + +BUILD_IMAGE_TIMEOUT_SEC = 30 * 60 +PUSH_IMAGE_TIMEOUT_SEC = 10 * 60 class ImageBuildResult(BaseModel): diff --git a/packages/syft/src/syft/custom_worker/runner_k8s.py b/packages/syft/src/syft/custom_worker/runner_k8s.py index ff2c3120ebb..9f21102221f 100644 --- a/packages/syft/src/syft/custom_worker/runner_k8s.py +++ b/packages/syft/src/syft/custom_worker/runner_k8s.py @@ -16,6 +16,8 @@ from .k8s import get_kr8s_client JSONPATH_AVAILABLE_REPLICAS = "{.status.availableReplicas}" +CREATE_POOL_TIMEOUT_SEC = 60 +SCALE_POOL_TIMEOUT_SEC = 60 class KubernetesRunner: @@ -57,7 +59,10 @@ def create_pool( ) # wait for replicas to be available and ready - deployment.wait(f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}") + deployment.wait( + f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}", + timeout=CREATE_POOL_TIMEOUT_SEC, + ) except Exception: raise finally: @@ -72,7 +77,10 @@ def scale_pool(self, pool_name: str, replicas: int) -> Optional[StatefulSet]: if not deployment: return None deployment.scale(replicas) - deployment.wait(f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}") + deployment.wait( + f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}", + timeout=SCALE_POOL_TIMEOUT_SEC, + ) return deployment def get_pool(self, pool_name: str) -> Optional[StatefulSet]: