Skip to content

Commit

Permalink
Merge pull request #275 from conda-forge/revert-272-cleanup_old_cuda_…
Browse files Browse the repository at this point in the history
…bits

Revert "Cleanup and consolidate CUDA images"
  • Loading branch information
isuruf authored Jul 16, 2024
2 parents 1e6db00 + f450b55 commit d773d8c
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 47 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,22 @@ jobs:
SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on aarch64"

- DOCKERIMAGE: linux-anvil-cuda
DOCKERFILE: linux-anvil-cuda
DOCKERTAG: "11.8"
CUDA_VER: "11.8.0"
DISTRO_ARCH: "amd64"
DISTRO_NAME: "centos"
DISTRO_VER: "7"
SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on x86_64 with CUDA"

- DOCKERIMAGE: linux-anvil-ppc64le-cuda
DOCKERFILE: linux-anvil-cuda
DOCKERTAG: "11.8"
CUDA_VER: "11.8.0"
DISTRO_ARCH: "ppc64le"
DISTRO_NAME: "ubi"
DISTRO_VER: "8"
SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on ppc64le with CUDA"

- DOCKERIMAGE: linux-anvil-aarch64-cuda
DOCKERFILE: linux-anvil-cuda
DOCKERTAG: "11.8"
CUDA_VER: "11.8.0"
DISTRO_ARCH: "arm64"
DISTRO_NAME: "ubi"
DISTRO_VER: "8"
SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on aarch64 with CUDA"
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ environment variables passed in to be able to build. In this case, you
will want to use a command similar to the following:

```sh
docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=11.8.0 -f linux-anvil-cuda/Dockerfile .
docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=10.2 -f linux-anvil-cuda/Dockerfile .
```

## Environment variables

* `CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The
value of this variable should be in major-minor-patch format, e.g. `11.8.0`.
* `DISTRO_ARCH`: This is the Linux architecture that the image should use.
Should match the upstream Docker image, e.g. `amd64`.
* `$CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The
value of this variable should be in major-minor for, e.g. `9.2` for versions
`9.x` and `10.x`. For versions `11.x` the variable should be in
major-minor-patch format, e.g. `11.2.0`.
* `DISTRO_NAME`: This is the Linux distro image name that should be built with.
Should match the upstream Docker image, e.g. `centos`.
* `DISTRO_VER`: This is version of Linux distro (typical CentOS) that the image
Expand Down
107 changes: 107 additions & 0 deletions linux-anvil-aarch64-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
ARG CUDA_VER
ARG DISTRO_NAME
ARG DISTRO_VER
FROM --platform=linux/arm64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}

LABEL maintainer="conda-forge <[email protected]>"

ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static

# Set CUDA_VER during runtime.
ARG CUDA_VER
ARG DISTRO_NAME
ARG DISTRO_VER
ENV CUDA_VER=${CUDA_VER} \
DISTRO_NAME=${DISTRO_NAME} \
DISTRO_VER=${DISTRO_VER}

# Set an encoding to make things work smoothly.
ENV LANG en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8

# Set path to CUDA install.
ENV CUDA_HOME /usr/local/cuda

# we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries;
# since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former
# the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat;
# add this to the ldconfig so it will be found correctly.
# don't forget to update settings by running ldconfig
RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
ldconfig

# bust the docker cache so that we always rerun the installs below
ADD https://loripsum.net/api /opt/docker/etc/gibberish

# Resolves a nasty NOKEY warning that appears when using yum.
# Naming convention changed with cos8 - see:
# * https://lists.centos.org/pipermail/centos-devel/2019-September/017847.html
# * https://www.centos.org/keys/#project-keys
RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos7" ]; then \
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 && \
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-7-aarch64; \
elif [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial; \
elif [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release; \
fi

# Add custom `yum_clean_all` script before using `yum`
COPY scripts/yum_clean_all /opt/docker/bin/

# Fallback to CentOS vault for CentOS 8 support.
RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \
find /etc/yum.repos.d/ -name "CentOS-*.repo" -exec \
sed -i 's/mirrorlist/#mirrorlist/g;s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' {} \; && \
yum update -y --disablerepo=cuda && \
/opt/docker/bin/yum_clean_all; \
fi

# Install basic requirements.
RUN yum update -y --disablerepo=cuda && \
yum install -y \
bzip2 \
sudo \
tar \
which \
&& \
/opt/docker/bin/yum_clean_all

# Fix locale in CentOS8 images
# See https://github.com/CentOS/sig-cloud-instance-images/issues/154
RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ] || [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
yum install -y glibc-langpack-en \
&& \
/opt/docker/bin/yum_clean_all; \
fi

# Remove preinclude system compilers
RUN rpm -e --nodeps --verbose gcc gcc-c++

# Run common commands
COPY scripts/run_commands /opt/docker/bin/run_commands
RUN /opt/docker/bin/run_commands

# Download and cache CUDA related packages.
RUN source /opt/conda/etc/profile.d/conda.sh && \
conda activate && \
conda create -n test --yes --quiet --download-only \
conda-forge::cudatoolkit=${CUDA_VER} \
&& \
conda remove --yes --quiet -n test --all && \
conda clean -tiy && \
chgrp -R lucky /opt/conda && \
chmod -R g=u /opt/conda

# Add a file for users to source to activate the `conda`
# environment `root`. Also add a file that wraps that for
# use with the `ENTRYPOINT`.
COPY linux-anvil-aarch64-cuda/entrypoint_source /opt/docker/bin/entrypoint_source
COPY scripts/entrypoint /opt/docker/bin/entrypoint

# Ensure that all containers start with tini and the user selected process.
# Activate the `conda` environment `root`.
# Provide a default command (`bash`), which will start if the user doesn't specify one.
ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ]
CMD [ "/bin/bash" ]
5 changes: 5 additions & 0 deletions linux-anvil-aarch64-cuda/entrypoint_source
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Add `CUDA_HOME` binaries to `PATH`.
export PATH="${PATH}:${CUDA_HOME}/bin"

# Activate the `base` conda environment.
conda activate base
71 changes: 37 additions & 34 deletions linux-anvil-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,72 +1,63 @@
# Set environment variables during runtime.
ARG CUDA_VER
ARG DISTRO_ARCH
ARG DISTRO_NAME
ARG DISTRO_VER
FROM --platform=linux/${DISTRO_ARCH} nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}
FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}

LABEL maintainer="conda-forge <[email protected]>"

# Set `ARG`s during runtime.
ARG CUDA_VER
ARG DISTRO_ARCH
ARG DISTRO_NAME
ARG DISTRO_VER
ENV CUDA_VER=${CUDA_VER} \
DISTRO_ARCH=${DISTRO_ARCH} \
DISTRO_NAME=${DISTRO_NAME} \
DISTRO_VER=${DISTRO_VER}

# Set an encoding to make things work smoothly.
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8

# Set path to CUDA install (this is a symlink to /usr/local/cuda-${CUDA_VER})
ENV CUDA_HOME /usr/local/cuda

# bust the docker cache so that we always rerun the installs below
ADD https://loripsum.net/api /opt/docker/etc/gibberish

# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed
ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static
ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static

# we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries;
# since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former
# the upstream images all have libcuda.so under $CUDA_HOME/compat;
# add this to the ldconfig so it will be found correctly.
# don't forget to update settings by running ldconfig
RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
ldconfig
RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf \
&& if [ ${CUDA_VER} != "9.2" ]; then \
# the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat;
# add this to the ldconfig so it will be found correctly.
echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \
else \
# For 9.2, the image nvidia/cuda:9.2-devel-centos6 contains neither
# $CUDA_HOME/compat, nor any (non-stub) libcuda.so. We fix this by
# adding cuda-compat-10.0 (which is not used for building, but to
# test if loading the respective library/package works). However,
# due to licensing reasons, these cannot be part of the conda-forge
# docker images, but are instead added for CI purposes in:
# github.com/conda-forge/conda-forge-ci-setup-feedstock/blob/master/recipe/run_conda_forge_build_setup_linux
# Here we only set the ldconfig accordingly.
echo "/usr/local/cuda-10.0/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \
fi \
# don't forget to update settings by running ldconfig
&& ldconfig

# bust the docker cache so that we always rerun the installs below
ADD https://loripsum.net/api /opt/docker/etc/gibberish

# Add the archived repo URL and fix RPM imports
ADD centos7-repos /tmp/centos7-repos
ADD scripts/fix_rpm /opt/docker/bin/fix_rpm
RUN /opt/docker/bin/fix_rpm

# Add custom `yum_clean_all` script before using `yum`
COPY scripts/yum_clean_all /opt/docker/bin/

# Install basic requirements.
COPY scripts/yum_clean_all /opt/docker/bin/
RUN yum update -y --disablerepo=cuda && \
yum install -y \
bzip2 \
sudo \
tar \
which \
&& \
which && \
/opt/docker/bin/yum_clean_all

# Fix locale in UBI 8 images
# See https://github.com/CentOS/sig-cloud-instance-images/issues/154
RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
yum install -y \
glibc-langpack-en \
&& \
/opt/docker/bin/yum_clean_all; \
fi

# Remove preinclude system compilers
RUN rpm -e --nodeps --verbose gcc gcc-c++

Expand All @@ -85,14 +76,26 @@ RUN source /opt/conda/etc/profile.d/conda.sh && \
chgrp -R lucky /opt/conda && \
chmod -R g=u /opt/conda

# Symlink CUDA headers that were moved from $CUDA_HOME/include to /usr/include
# in CUDA 10.1.
RUN for HEADER_FILE in cublas_api.h cublas.h cublasLt.h cublas_v2.h cublasXt.h nvblas.h; do \
if [[ ! -f "${CUDA_HOME}/include/${HEADER_FILE}" ]]; \
then ln -s "/usr/include/${HEADER_FILE}" "${CUDA_HOME}/include/${HEADER_FILE}"; \
fi; \
done

# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed
ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static
ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static

# Add a file for users to source to activate the `conda`
# environment `base`. Also add a file that wraps that for
# use with the `ENTRYPOINT`.
COPY linux-anvil-cuda/entrypoint_source /opt/docker/bin/entrypoint_source
COPY scripts/entrypoint /opt/docker/bin/entrypoint

# Ensure that all containers start with tini and the user selected process.
# Activate the `conda` environment `base`.
# Activate the `conda` environment `base` and the devtoolset compiler.
# Provide a default command (`bash`), which will start if the user doesn't specify one.
ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ]
CMD [ "/bin/bash" ]
Loading

0 comments on commit d773d8c

Please sign in to comment.