Skip to content

Commit

Permalink
[k8s] add kr8s wait timeouts
Browse files Browse the repository at this point in the history
  • Loading branch information
yashgorana committed Feb 7, 2024
1 parent b6f563b commit eeb1179
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 8 deletions.
5 changes: 2 additions & 3 deletions packages/syft/src/syft/custom_worker/builder_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import docker

# relative
from .builder_types import BUILD_IMAGE_TIMEOUT_SEC
from .builder_types import BuilderBase
from .builder_types import ImageBuildResult
from .builder_types import ImagePushResult
Expand All @@ -18,8 +19,6 @@


class DockerBuilder(BuilderBase):
BUILD_MAX_WAIT = 30 * 60

def build_image(
self,
tag: str,
Expand All @@ -40,7 +39,7 @@ def build_image(
with contextlib.closing(docker.from_env()) as client:
image_result, logs = client.images.build(
tag=tag,
timeout=self.BUILD_MAX_WAIT,
timeout=BUILD_IMAGE_TIMEOUT_SEC,
buildargs=buildargs,
**kwargs,
)
Expand Down
12 changes: 10 additions & 2 deletions packages/syft/src/syft/custom_worker/builder_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
from kr8s.objects import Secret

# relative
from .builder_types import BUILD_IMAGE_TIMEOUT_SEC
from .builder_types import BuilderBase
from .builder_types import ImageBuildResult
from .builder_types import ImagePushResult
from .builder_types import PUSH_IMAGE_TIMEOUT_SEC
from .k8s import INTERNAL_REGISTRY_HOST
from .k8s import JOB_COMPLETION_TTL
from .k8s import KUBERNETES_NAMESPACE
Expand Down Expand Up @@ -66,7 +68,10 @@ def build_image(
)

# wait for job to complete/fail
job.wait(["condition=Complete", "condition=Failed"])
job.wait(
["condition=Complete", "condition=Failed"],
timeout=BUILD_IMAGE_TIMEOUT_SEC,
)

# get logs
logs = self._get_logs(job)
Expand Down Expand Up @@ -119,7 +124,10 @@ def push_image(
push_secret=push_secret,
)

job.wait(["condition=Complete", "condition=Failed"])
job.wait(
["condition=Complete", "condition=Failed"],
timeout=PUSH_IMAGE_TIMEOUT_SEC,
)
exit_code = self._get_exit_code(job)[0]
logs = self._get_logs(job)
except Exception:
Expand Down
12 changes: 11 additions & 1 deletion packages/syft/src/syft/custom_worker/builder_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@
# third party
from pydantic import BaseModel

__all__ = ["BuilderBase", "ImageBuildResult", "ImagePushResult"]
__all__ = [
"BuilderBase",
"ImageBuildResult",
"ImagePushResult",
"BUILD_IMAGE_TIMEOUT_SEC",
"PUSH_IMAGE_TIMEOUT_SEC",
]


BUILD_IMAGE_TIMEOUT_SEC = 30 * 60
PUSH_IMAGE_TIMEOUT_SEC = 10 * 60


class ImageBuildResult(BaseModel):
Expand Down
12 changes: 10 additions & 2 deletions packages/syft/src/syft/custom_worker/runner_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from .k8s import get_kr8s_client

JSONPATH_AVAILABLE_REPLICAS = "{.status.availableReplicas}"
CREATE_POOL_TIMEOUT_SEC = 60
SCALE_POOL_TIMEOUT_SEC = 60


class KubernetesRunner:
Expand Down Expand Up @@ -57,7 +59,10 @@ def create_pool(
)

# wait for replicas to be available and ready
deployment.wait(f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}")
deployment.wait(
f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}",
timeout=CREATE_POOL_TIMEOUT_SEC,
)
except Exception:
raise
finally:
Expand All @@ -72,7 +77,10 @@ def scale_pool(self, pool_name: str, replicas: int) -> Optional[StatefulSet]:
if not deployment:
return None
deployment.scale(replicas)
deployment.wait(f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}")
deployment.wait(
f"jsonpath='{JSONPATH_AVAILABLE_REPLICAS}'={replicas}",
timeout=SCALE_POOL_TIMEOUT_SEC,
)
return deployment

def get_pool(self, pool_name: str) -> Optional[StatefulSet]:
Expand Down

0 comments on commit eeb1179

Please sign in to comment.