-
Notifications
You must be signed in to change notification settings - Fork 614
/
Copy pathDockerfile_k8s_gpu
72 lines (62 loc) · 3.27 KB
/
Dockerfile_k8s_gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# We use the cuda runtime image instead of devel image to reduce size (1.3GB vs 3.6GB)
FROM nvidia/cuda:12.1.1-runtime-ubuntu20.04
# Detect architecture using ARG with default value
ARG TARGETARCH
ARG DEBIAN_FRONTEND=noninteractive
# Install ssh and other local dependencies
# We remove cuda lists to avoid conflicts with the cuda version installed by ray
RUN rm -rf /etc/apt/sources.list.d/cuda* && \
apt update -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat-openbsd curl -y && \
rm -rf /var/lib/apt/lists/*
# Setup SSH and generate hostkeys
RUN sudo mkdir -p /var/run/sshd && \
sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \
cd /etc/ssh/ && \
sudo ssh-keygen -A
# Setup new user named sky and add to sudoers. \
# Also add /opt/conda/bin to sudo path and give sky user permission to run sudo without password
RUN sudo useradd -m -s /bin/bash sky && \
sudo /bin/bash -c 'echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers' && \
sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky"
# Switch to sky user
USER sky
# Set HOME environment variable for sky user
ENV HOME=/home/sky
# Set current working directory
WORKDIR /home/sky
SHELL ["/bin/bash", "-c"]
# Install conda and other dependencies based on architecture
# Keep the conda and Ray versions below in sync with the ones in skylet.constants
# Keep this section in sync with the custom image optimization recommendations in our docs (kubernetes-getting-started.rst)
RUN ARCH=${TARGETARCH:-$(case "$(uname -m)" in \
"x86_64") echo "amd64" ;; \
"aarch64") echo "arm64" ;; \
*) echo "$(uname -m)" ;; \
esac)} && \
if [ "$ARCH" = "arm64" ]; then \
curl -fsSL https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-aarch64.sh -o miniconda.sh; \
else \
curl -fsSL https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -o miniconda.sh; \
fi && \
bash miniconda.sh -b -p $HOME/miniconda3 && \
rm miniconda.sh && \
eval "$($HOME/miniconda3/bin/conda shell.bash hook)" && \
conda init && \
conda config --set auto_activate_base true && \
export PIP_DISABLE_PIP_VERSION_CHECK=1 && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
$HOME/.local/bin/uv venv ~/skypilot-runtime --seed --python=3.10 && \
source ~/skypilot-runtime/bin/activate && \
$HOME/.local/bin/uv pip install 'skypilot-nightly[remote,kubernetes]' 'ray[default]==2.9.3' 'pycryptodome==3.12.0' && \
$HOME/.local/bin/uv pip uninstall skypilot-nightly && \
curl -LO "https://dl.k8s.io/release/v1.31.6/bin/linux/$ARCH/kubectl" && \
# Install kubectl to user's local bin instead of system path to avoid
# sudo-related issues during cross-architecture builds, especially on ARM
chmod +x kubectl && \
mkdir -p $HOME/.local/bin && \
mv kubectl $HOME/.local/bin/ && \
echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc
# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately
ENV PYTHONUNBUFFERED=1