Skip to content

Commit

Permalink
NGC PyTorch 24.12 Dockerfile & requirements.txt (#305)
Browse files Browse the repository at this point in the history
* NGC PyTorch 24.12 and requirements.txt

NGC PyTorch 24.12
 - Python 3.12
 - PyTorch 2.6
 - CUDA 12.6
and requirements.txt for NGC PyTorch 24.12

* modify installing dependencies
 - remove hwloc-like packages (Dependency conflict
  ImportError: /opt/hpcx/ucc/lib/libucc.so.1: undefined symbol issue)
 - kss (Dependency issue; python 3.12 doesn't support pyyaml 6.0,
   changed pyyaml version 6.0 to 6.0.2)

* fix to run Jupyter & Jupyter notebook for ngc-pytorch24.12
 - update Jupyter libraries

* fix jupyter call definition
  • Loading branch information
studioego authored Feb 15, 2025
1 parent 18ec726 commit 0ffbfbd
Show file tree
Hide file tree
Showing 2 changed files with 592 additions and 0 deletions.
312 changes: 312 additions & 0 deletions vendor/ngc-pytorch/Dockerfile.24.12-pytorch2.6-py312-cuda12.6
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
FROM nvcr.io/nvidia/pytorch:24.12-py3
# NVIDIA NGC PyTorch with Python 3.12

ENV CODESERVER=4.96.4
ENV DEBIAN_FRONTEND=noninteractive \
MPLBACKEND=Svg \
PIP_IGNORE_INSTALLED=0 \
PYTHONUNBUFFERED=1 \
LD_LIBRARY_PATH="/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-12.6:/usr/local/cuda-12.6/include:/usr/include/x86_64-linux-gnu:/opt/hpcx/ucc/lib" \
PATH="/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin" \
BNB_CUDA_VERSION=126 \
CPLUS_INCLUDE_PATH=/usr/include/gdal \
C_INCLUDE_PATH=/usr/include/gdal \
LANG=C.UTF-8

#RUN apt remove libappstream3
#
RUN apt-key adv --refresh-keys --keyserver keyserver.ubuntu.com
RUN apt-get update
RUN apt-get install -y --no-install-recommends \
automake \
bison \
build-essential \
bzip2 \
ca-certificates \
cabextract \
chrpath \
devscripts \
dh-make \
dirmngr \
dkms \
# dpatch \
ethtool \
fakeroot \
ffmpeg \
flex \
fonts-nanum \
fonts-nanum-coding \
fonts-nanum-extra \
g++ \
gcc \
gdal-bin \
gfortran \
gir1.2-gtk-3.0 \
git \
graphviz \
htop \
iputils-ping \
libasound2-dev \
libatlas-base-dev \
libavcodec-dev \
libavformat-dev \
libboost-dev \
libboost-filesystem-dev \
libboost-system-dev \
libcairo2-dev \
libcurl4-openssl-dev \
libdc1394-dev \
libeigen3-dev \
libfaac-dev \
libfftw3-dev \
libfreetype-dev \
libfuse2 \
libgdal-dev \
libgeos++-dev \
libgeos-dev \
libgflags-dev \
libgirepository1.0-dev \
libglib2.0-0 \
libgoogle-glog-dev \
libgphoto2-dev \
libgstreamer-plugins-base1.0-dev \
libgstreamer1.0-dev \
libgtk-3-dev \
libhdf5-dev \
libjemalloc-dev \
libjpeg-dev \
liblapack-dev \
libmnl0 \
libmp3lame-dev \
# libmpdec-dev \
libncurses-dev \
libnl-3-200 \
libnl-3-dev \
libnl-route-3-200 \
libnl-route-3-dev \
libnlopt-dev \
libnuma-dev \
libopenblas-dev \
libopencore-amrnb-dev \
libopencore-amrwb-dev \
libpci3 \
libpng-dev \
libportmidi-dev \
libproj-dev \
libprotobuf-dev \
libsdl2-dev \
libsdl-image1.2-dev \
libsdl-mixer1.2-dev \
libsdl-ttf2.0-dev \
libsm6 \
libsmpeg-dev \
libssl-dev \
libswscale-dev \
libswresample-dev \
libtbb-dev \
libtheora-dev \
libtiff-dev \
libv4l-dev \
libvorbis-dev \
libx264-dev \
libxext6 \
libxine2-dev \
libxml2-dev \
libxrender-dev \
libxrender1 \
libxslt1-dev \
libxvidcore-dev \
libyaml-dev \
libzmq3-dev \
lintian \
lsof \
make \
mercurial \
media-types \
mailcap \
# mpich \
ncurses-term \
net-tools \
openjdk-11-jdk \
openssh-client \
openssh-server \
pbzip2 \
pciutils \
pdsh \
pkg-config \
proj-bin \
protobuf-compiler \
pv \
python-is-python3 \
python3-xkit \
python3-gdal \
screen-resolution-extra \
sudo \
subversion \
swig \
tk \
unzip \
v4l-utils \
vim \
x264 \
xvfb \
xz-utils \
sudo \
yarn \
yasm \
zip \
tcl \
udev
RUN mkdir -p /opt/oracle && \
cd /opt/oracle && \
wget https://download.oracle.com/otn_software/linux/instantclient/instantclient-basiclite-linuxx64.zip && \
unzip instantclient-basiclite-linuxx64.zip && \
rm -f instantclient-basiclite-linuxx64.zip && \
cd /opt/oracle/instantclient* && \
rm -f *jdbc* *occi* *mysql* *README *jar uidrvci genezi adrci && \
echo /opt/oracle/instantclient* > /etc/ld.so.conf.d/oracle-instantclient.conf && \
ldconfig && \
cd /tmp && \
curl -sL https://deb.nodesource.com/setup_20.x -o /tmp/nodesource_setup.sh && \
bash /tmp/nodesource_setup.sh && \
apt update ; apt install nodejs && \
# Install CUDA + cuDNN
mkdir -p /usr/local/nvidia/lib && \
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.9.6.0 /usr/local/cuda/lib64/libcudnn.so && \
ldconfig && \
update-alternatives --install /usr/bin/python python /usr/bin/python3 2

WORKDIR /tmp
RUN git clone -q --branch=v0.3.27 https://github.com/xianyi/OpenBLAS.git && \
cd OpenBLAS && \
make DYNAMIC_ARCH=1 NO_AFFINITY=1 NUM_THREADS=48 FC=gfortran && \
make install && \
# install git-lfs
cd /tmp && \
curl -sLO https://github.com/git-lfs/git-lfs/releases/download/v3.5.1/git-lfs-linux-amd64-v3.5.1.tar.gz && \
tar -zxf git-lfs-linux-amd64-v3.5.1.tar.gz && \
cd git-lfs-3.5.1 && \
bash install.sh && \
git clone https://github.com/aristocratos/bashtop.git && \
cd bashtop && \
make install && \
rm -rf /tmp/*

RUN curl -fL https://github.com/cdr/code-server/releases/download/v$CODESERVER/code-server-$CODESERVER-linux-amd64.tar.gz \
| tar -C /usr/local/lib -xz && \
mv /usr/local/lib/code-server-$CODESERVER-linux-amd64 /usr/local/lib/code-server-$CODESERVER && \
ln -s /usr/local/lib/code-server-$CODESERVER/bin/code-server /usr/local/bin/code-server

# remove hwloc-like packages (ImportError: /opt/hpcx/ucc/lib/libucc.so.1: undefined symbol issue)
RUN apt-get purge -y hwloc-nox libhwloc-plugins
# install kss (temporary, when kss package's pyyaml version is updated, move to requirements.txt)
WORKDIR /tmp
RUN python3 -m pip install --upgrade pip setuptools wheel Cython
RUN cd /tmp && git clone https://github.com/hyunwoongko/kss.git && \
cd kss && \
sed -i 's/pyyaml==6.0/pyyaml>=6.0.2/' setup.py && \
pip install . && \
rm -rf /tmp/*

COPY ./requirements.24.12.txt /tmp/requirements.txt
RUN python3 -m pip install --disable-pip-version-check --no-cache-dir -r requirements.txt
RUN python3 -m pip install datasets==3.0.1 multiprocess==0.70.16

RUN python3 -m pip install --no-cache-dir \
mpi4py>=3.1.4 \
nni>=2.10.1 \
mlflow>=2.6.0 \
scikit-nni>=0.2.1

RUN python3 -m pip install pytorch-lightning && \
python3 -m pip install --no-cache-dir \
torch-scatter \
torch-sparse \
torch-cluster \
torch-spline-conv \
torch-geometric


WORKDIR /tmp
RUN git clone -q https://github.com/TimDettmers/bitsandbytes.git && \
cd /tmp/bitsandbytes && \
cmake -B build -DBUILD_CUDA=ON -S . && \
python setup.py install

# Install ipython kernelspec
RUN python3 -m ipykernel install --display-name "PyTorch 2.6 (NGC 24.12/Python 3.12) on Backend.AI" && \
cat /usr/local/share/jupyter/kernels/python3/kernel.json
# Backend.AI specifics
COPY ./service-defs /etc/backend.ai/service-defs
COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/

LABEL ai.backend.kernelspec="1" \
ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
ai.backend.features="batch query uid-match user-input" \
ai.backend.base-distro="ubuntu24.04" \
ai.backend.accelerators="cuda" \
ai.backend.resource.min.cpu="1" \
ai.backend.resource.min.mem="1g" \
ai.backend.resource.min.cuda.device=1 \
ai.backend.resource.min.cuda.shares=0 \
ai.backend.runtime-type="python" \
ai.backend.runtime-path="/usr/bin/python" \
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
# Install Jupyterlab extensions

RUN python3 -m pip install --no-cache-dir \
jupyter_nbextensions_configurator>=0.6.3 \
jupyter_contrib_nbextensions>=0.7.0 \
jupyter-contrib-core>=0.4.2 \
jupyter-highlight-selected-word>=0.2.0 \
jupyter-server>=2.14.2 \
jupyter_lsp>=2.2.0
RUN python3 -m pip install --no-cache-dir \
jupyterlab>=4.3.2 \
jupyterlab-code-formatter>=3.0.2 \
jupyterlab-hdf \
jupyterlab-launcher \
notebook

RUN python3 -m pip install --no-cache-dir \
jupyter-dash \
jupyter-js-widgets-nbextension \
jupyter-latex-envs \
jupyter-packaging
RUN python3 -m pip install --no-cache-dir \
jupyter-server-mathjax \
jupyter-server-proxy==1.6.0 \
jupyter-telemetry==0.1.0 \
jupyter==1.0.0 \
jupyter-client==8.4.0 \
jupyter_bokeh==2.0.4

RUN python3 -m pip uninstall -y markupsafe && \
python3 -m pip uninstall -y jupyter_core==5.3.0 && \
python3 -m pip install markupsafe==2.0.1 &&\
python3 -m pip install jsonschema[format,format-nongpl]==4.17.3

RUN python3 -m pip install widgetsnbextension jupyter_contrib_nbextensions jupyter_lsp markupsafe==2.0.1 && \
jupyter lab build --dev-build=False --minimize=False && \
jupyter nbextensions_configurator enable && \
jupyter contrib nbextension install && \
jupyter nbextension enable --py --sys-prefix widgetsnbextension && \
jupyter serverextension enable --py jupyterlab --sys-prefix && \
jupyter labextension install --no-build @jupyter-widgets/jupyterlab-manager && \
jupyter serverextension enable --py jupyter_lsp && \
jupyter labextension install --no-build @jupyterlab/toc && \
jupyter nbextension enable execute_time/ExecuteTime && \
jupyter nbextension enable toc2/main && \
jupyter labextension install @jupyterlab/toc && \
jupyter lab build

RUN apt autoclean && \
sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \
ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color && \
rm -f /tmp/*.whl /tmp/requirem* && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /root/.cache && \
rm -rf /tmp/*


WORKDIR /home/work
Loading

0 comments on commit 0ffbfbd

Please sign in to comment.