From 2d4059af59cc0fd932eac2d61623035f8898168a Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 10 Sep 2024 14:50:09 -0700 Subject: [PATCH] [Lambda][k8s] k8s deployment guide on lambda cloud (#3929) * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Deploy k8s with sky * Suppress insecure request warnings * Updates * Update installation * update readme * updates * revert urllib warning * Update kubernetes.py * naming updates * comments --- .../kubernetes/kubernetes-deployment.rst | 20 +++- examples/k8s_cloud_deploy/README.md | 96 +++++++++++++++++++ examples/k8s_cloud_deploy/cloud_k8s.yaml | 96 +++++++++++++++++++ examples/k8s_cloud_deploy/launch_k8s.sh | 87 +++++++++++++++++ sky/templates/lambda-ray.yml.j2 | 4 +- 5 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 examples/k8s_cloud_deploy/README.md create mode 100644 examples/k8s_cloud_deploy/cloud_k8s.yaml create mode 100755 examples/k8s_cloud_deploy/launch_k8s.sh diff --git a/docs/source/reference/kubernetes/kubernetes-deployment.rst b/docs/source/reference/kubernetes/kubernetes-deployment.rst index eb5bb31d78d..8384d412ced 100644 --- a/docs/source/reference/kubernetes/kubernetes-deployment.rst +++ b/docs/source/reference/kubernetes/kubernetes-deployment.rst @@ -35,6 +35,13 @@ Below we include minimal guides to set up a new Kubernetes cluster in different Amazon's hosted Kubernetes service. + .. grid-item-card:: On-demand Cloud VMs + :link: kubernetes-setup-ondemand + :link-type: ref + :text-align: center + + We provide scripts to deploy k8s on on-demand cloud VMs. + .. _kubernetes-setup-kind: @@ -267,4 +274,15 @@ After the GPU operator is installed, create the nvidia RuntimeClass required by metadata: name: nvidia handler: nvidia - EOF \ No newline at end of file + EOF + + +.. _kubernetes-setup-ondemand: + +Deploying on cloud VMs +^^^^^^^^^^^^^^^^^^^^^^ + +You can also spin up on-demand cloud VMs and deploy Kubernetes on them. + +We provide scripts to take care of provisioning VMs, installing Kubernetes, setting up GPU support and configuring your local kubeconfig. +Refer to our `Deploying Kubernetes on VMs guide `_ for more details. \ No newline at end of file diff --git a/examples/k8s_cloud_deploy/README.md b/examples/k8s_cloud_deploy/README.md new file mode 100644 index 00000000000..64519e2fa53 --- /dev/null +++ b/examples/k8s_cloud_deploy/README.md @@ -0,0 +1,96 @@ +# Deploying a Kubernetes cluster on the cloud in 1-click with SkyPilot + +This example demonstrates how to deploy a Kubernetes cluster on the cloud with SkyPilot. For the purposes of this guide, we will use lambda cloud as the cloud provider, but you can change cloud providers by editing `cloud_k8s.yaml`. + +## Prerequisites +1. Latest SkyPilot nightly release: +```bash +pip install "skypilot-nightly[lambda,kubernetes]" +``` + +2. Use a cloud which supports opening ports on SkyPilot or manually expose ports 6443 and 443 on the VMs. This is required to expose k8s API server. + + For example, if using lambda cloud, configure the firewall on the lambda cloud dashboard to allow inbound connections on port `443` and `6443`. + +

+firewall +

+ +## Instructions + +1. Edit `cloud_k8s.yaml` to set the desired number of workers and GPUs per node. If using GCP, AWS or Azure, uncomment the ports line to allow inbound connections to the Kubernetes API server. +```yaml +resources: + cloud: lambda + accelerators: A10:1 + # ports: 6443 + +num_nodes: 2 +``` + +2. Use the convenience script to launch the cluster: +```bash +./launch_k8s.sh +``` + +SkyPilot will do all the heavy lifting for you: provision lambda VMs, deploy the k8s cluster, fetch the kubeconfig, and set up your local kubectl to connect to the cluster. + +3. You should now be able to run `kubectl` and `sky` commands to interact with the cluster: +```console +$ kubectl get nodes +NAME STATUS ROLES AGE VERSION +129-80-133-44 Ready 14m v1.30.4+k3s1 +150-230-191-161 Ready control-plane,master 14m v1.30.4+k3s1 + +$ sky show-gpus --cloud kubernetes +Kubernetes GPUs +GPU QTY_PER_NODE TOTAL_GPUS TOTAL_FREE_GPUS +A10 1 2 2 + +Kubernetes per node GPU availability +NODE_NAME GPU_NAME TOTAL_GPUS FREE_GPUS +129-80-133-44 A10 1 1 +150-230-191-161 A10 1 1 +``` + +## Run AI workloads on your Kubernetes cluster with SkyPilot + +### Development clusters +To launch a [GPU enabled development cluster](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html), run `sky launch -c mycluster --cloud kubernetes --gpus A10:1`. + +SkyPilot will setup SSH config for you. +* [SSH access](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html#ssh): `ssh mycluster` +* [VSCode remote development](https://skypilot.readthedocs.io/en/latest/examples/interactive-development.html#vscode): `code --remote ssh-remote+mycluster "/"` + + +### Jobs +To run jobs, use `sky jobs launch --gpus A10:1 --cloud kubernetes -- 'nvidia-smi; sleep 600'` + +You can submit multiple jobs and let SkyPilot handle queuing if the cluster runs out of resources: +```bash +$ sky jobs queue +Fetching managed job statuses... +Managed jobs +In progress tasks: 2 RUNNING, 1 STARTING +ID TASK NAME RESOURCES SUBMITTED TOT. DURATION JOB DURATION #RECOVERIES STATUS +3 - finetune 1x[A10:1] 24 secs ago 24s - 0 STARTING +2 - qlora 1x[A10:1] 2 min ago 2m 18s 12s 0 RUNNING +1 - sky-cmd 1x[A10:1] 4 mins ago 4m 27s 3m 12s 0 RUNNING +``` + +You can also observe the pods created by SkyPilot with `kubectl get pods`: +```bash +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +qlora-2-2ea4-head 1/1 Running 0 5m31s +sky-cmd-1-2ea4-head 1/1 Running 0 8m36s +sky-jobs-controller-2ea485ea-2ea4-head 1/1 Running 0 10m +``` + +Refer to [SkyPilot docs](https://skypilot.readthedocs.io/) for more. + +## Teardown +To teardown the Kubernetes cluster, run: +```bash +sky down k8s +``` diff --git a/examples/k8s_cloud_deploy/cloud_k8s.yaml b/examples/k8s_cloud_deploy/cloud_k8s.yaml new file mode 100644 index 00000000000..2db46fb502b --- /dev/null +++ b/examples/k8s_cloud_deploy/cloud_k8s.yaml @@ -0,0 +1,96 @@ +resources: + cloud: lambda + accelerators: A10:1 +# Uncomment the following line to expose ports on a different cloud +# ports: 6443 + +num_nodes: 2 + +envs: + SKY_K3S_TOKEN: mytoken # Can be any string, used to join worker nodes to the cluster + +run: | + wait_for_gpu_operator_installation() { + echo "Starting wait for GPU operator installation..." + + SECONDS=0 + TIMEOUT=600 # 10 minutes in seconds + + while true; do + if kubectl describe nodes --kubeconfig ~/.kube/config | grep -q 'nvidia.com/gpu:'; then + echo "GPU operator installed." + break + elif [ $SECONDS -ge $TIMEOUT ]; then + echo "Timed out waiting for GPU operator installation." + exit 1 + else + echo "Waiting for GPU operator installation..." + echo "To check status, see Nvidia GPU operator pods:" + echo "kubectl get pods -n gpu-operator --kubeconfig ~/.kube/config" + sleep 5 + fi + done + } + + if [ ${SKYPILOT_NODE_RANK} -ne 0 ]; then + # Worker nodes + MASTER_ADDR=`echo "$SKYPILOT_NODE_IPS" | head -n1` + echo "Worker joining k3s cluster @ ${MASTER_ADDR}" + curl -sfL https://get.k3s.io | K3S_URL=https://${MASTER_ADDR}:6443 K3S_TOKEN=${SKY_K3S_TOKEN} sh - + exit 0 + fi + + # Head node + curl -sfL https://get.k3s.io | K3S_TOKEN=${SKY_K3S_TOKEN} sh - + + # Copy over kubeconfig file + echo "Copying kubeconfig file" + mkdir -p $HOME/.kube + sudo cp /etc/rancher/k3s/k3s.yaml $HOME/.kube/config + sudo chown $(id -u):$(id -g) $HOME/.kube/config + + # Wait for k3s to be ready + echo "Waiting for k3s to be ready" + sleep 5 + kubectl wait --for=condition=ready node --all --timeout=5m --kubeconfig ~/.kube/config + + # =========== GPU support =========== + # Install helm + echo "Installing helm" + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 + chmod 700 get_helm.sh + ./get_helm.sh + + helm repo add nvidia https://helm.ngc.nvidia.com/nvidia && helm repo update + + # Create namespace if it doesn't exist + echo "Creating namespace gpu-operator" + kubectl create namespace gpu-operator --kubeconfig ~/.kube/config || true + + # Patch ldconfig + echo "Patching ldconfig" + sudo ln -s /sbin/ldconfig /sbin/ldconfig.real + + # Install GPU operator + echo "Installing GPU operator" + helm install gpu-operator -n gpu-operator --create-namespace \ + nvidia/gpu-operator $HELM_OPTIONS \ + --set 'toolkit.env[0].name=CONTAINERD_CONFIG' \ + --set 'toolkit.env[0].value=/var/lib/rancher/k3s/agent/etc/containerd/config.toml' \ + --set 'toolkit.env[1].name=CONTAINERD_SOCKET' \ + --set 'toolkit.env[1].value=/run/k3s/containerd/containerd.sock' \ + --set 'toolkit.env[2].name=CONTAINERD_RUNTIME_CLASS' \ + --set 'toolkit.env[2].value=nvidia' + + wait_for_gpu_operator_installation + + # Create RuntimeClass + sleep 5 + echo "Creating RuntimeClass" + kubectl apply --kubeconfig ~/.kube/config -f - <&1) || true + +# Check if the command was successful and if the output contains a valid IP address +if [[ "$SKY_STATUS_OUTPUT" != *"ValueError"* ]]; then + PRIMARY_ENDPOINT="$SKY_STATUS_OUTPUT" +else + echo "Primary endpoint retrieval failed or unsupported. Falling back to alternate method..." +fi + +# If primary endpoint is empty or invalid, try to fetch from SSH config +if [[ -z "$PRIMARY_ENDPOINT" ]]; then + echo "Using alternate method to fetch endpoint..." + + # Parse the HostName from the SSH config file + SSH_CONFIG_FILE="$HOME/.sky/generated/ssh/${CLUSTER_NAME}" + if [[ -f "$SSH_CONFIG_FILE" ]]; then + ENDPOINT=$(awk '/^ *HostName / { print $2; exit}' "$SSH_CONFIG_FILE") + ENDPOINT="${ENDPOINT}:6443" + fi + + if [[ -z "$ENDPOINT" ]]; then + echo "Failed to retrieve a valid endpoint. Exiting." + exit 1 + fi +else + ENDPOINT="$PRIMARY_ENDPOINT" + echo "Using primary endpoint: $ENDPOINT" +fi + +# Rsync the remote kubeconfig to the local machine +mkdir -p ~/.kube +rsync -av ${CLUSTER_NAME}:'~/.kube/config' ~/.kube/config + +KUBECONFIG_FILE="$HOME/.kube/config" + +# Back up the original kubeconfig file if it exists +if [[ -f "$KUBECONFIG_FILE" ]]; then + echo "Backing up kubeconfig file to ${KUBECONFIG_FILE}.bak" + cp "$KUBECONFIG_FILE" "${KUBECONFIG_FILE}.bak" +fi + +# Temporary file to hold the modified kubeconfig +TEMP_FILE=$(mktemp) + +# Remove the certificate-authority-data, and replace the server with +awk ' + BEGIN { in_cluster = 0 } + /^clusters:/ { in_cluster = 1 } + /^users:/ { in_cluster = 0 } + in_cluster && /^ *certificate-authority-data:/ { next } + in_cluster && /^ *server:/ { + print " server: https://'${ENDPOINT}'" + print " insecure-skip-tls-verify: true" + next + } + { print } +' "$KUBECONFIG_FILE" > "$TEMP_FILE" + +# Replace the original kubeconfig with the modified one +mv "$TEMP_FILE" "$KUBECONFIG_FILE" + +echo "Updated kubeconfig file successfully." + +sleep 5 # Wait for the cluster to be ready +sky check kubernetes + +set +x +echo -e "\033[1m===== Kubernetes cluster deployment complete =====\033[0m" +echo -e "You can now access your k8s cluster with kubectl and skypilot.\n" +echo -e "• View the list of available GPUs on Kubernetes: \033[1msky show-gpus --cloud kubernetes\033[0m" +echo -e "• To launch a SkyPilot job running nvidia-smi on this cluster: \033[1msky launch --cloud kubernetes --gpus -- nvidia-smi\033[0m" + diff --git a/sky/templates/lambda-ray.yml.j2 b/sky/templates/lambda-ray.yml.j2 index 4e8b834503f..6b6d94cfb3c 100644 --- a/sky/templates/lambda-ray.yml.j2 +++ b/sky/templates/lambda-ray.yml.j2 @@ -89,13 +89,13 @@ setup_commands: # Increment the following for catching performance bugs easier: # current num items (num SSH connections): 2 head_start_ray_commands: - - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; + - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --head --port={{ray_port}} --min-worker-port 11002 --dashboard-port={{ray_dashboard_port}} --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {{dump_port_command}}; {{ray_head_wait_initialized_command}} {%- if num_nodes > 1 %} worker_start_ray_commands: - - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; + - {{ sky_activate_python_env }}; {{ sky_ray_cmd }} stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 {{ sky_ray_cmd }} start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --min-worker-port 11002 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- else %} worker_start_ray_commands: []