Merge pull request #275 from conda-forge/revert-272-cleanup_old_cuda_…

…bits Revert "Cleanup and consolidate CUDA images"
conda-forge · Jul 16, 2024 · d773d8c · d773d8c
2 parents 1e6db00 + f450b55
commit d773d8c
Show file tree

Hide file tree

Showing 8 changed files with 287 additions and 47 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -36,28 +36,22 @@ jobs:
             SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on aarch64"
 
           - DOCKERIMAGE: linux-anvil-cuda
-            DOCKERFILE: linux-anvil-cuda
             DOCKERTAG: "11.8"
             CUDA_VER: "11.8.0"
-            DISTRO_ARCH: "amd64"
             DISTRO_NAME: "centos"
             DISTRO_VER: "7"
             SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on x86_64 with CUDA"
 
           - DOCKERIMAGE: linux-anvil-ppc64le-cuda
-            DOCKERFILE: linux-anvil-cuda
             DOCKERTAG: "11.8"
             CUDA_VER: "11.8.0"
-            DISTRO_ARCH: "ppc64le"
             DISTRO_NAME: "ubi"
             DISTRO_VER: "8"
             SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on ppc64le with CUDA"
 
           - DOCKERIMAGE: linux-anvil-aarch64-cuda
-            DOCKERFILE: linux-anvil-cuda
             DOCKERTAG: "11.8"
             CUDA_VER: "11.8.0"
-            DISTRO_ARCH: "arm64"
             DISTRO_NAME: "ubi"
             DISTRO_VER: "8"
             SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on aarch64 with CUDA"

diff --git a/README.md b/README.md
@@ -18,15 +18,15 @@ environment variables passed in to be able to build. In this case, you
 will want to use a command similar to the following:
 
 ```sh
-docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=11.8.0 -f linux-anvil-cuda/Dockerfile .
+docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=10.2 -f linux-anvil-cuda/Dockerfile .
 ```
 
 ## Environment variables
 
-* `CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The
-  value of this variable should be in major-minor-patch format, e.g. `11.8.0`.
-* `DISTRO_ARCH`: This is the Linux architecture that the image should use.
-  Should match the upstream Docker image, e.g. `amd64`.
+* `$CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The
+  value of this variable should be in major-minor for, e.g. `9.2` for versions
+  `9.x` and `10.x`. For versions `11.x` the variable should be in
+  major-minor-patch format, e.g. `11.2.0`.
 * `DISTRO_NAME`: This is the Linux distro image name that should be built with.
   Should match the upstream Docker image, e.g. `centos`.
 * `DISTRO_VER`: This is version of Linux distro (typical CentOS) that the image

diff --git a/linux-anvil-aarch64-cuda/Dockerfile b/linux-anvil-aarch64-cuda/Dockerfile
@@ -0,0 +1,107 @@
+ARG CUDA_VER
+ARG DISTRO_NAME
+ARG DISTRO_VER
+FROM --platform=linux/arm64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}
+
+LABEL maintainer="conda-forge <[email protected]>"
+
+ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static
+
+# Set CUDA_VER during runtime.
+ARG CUDA_VER
+ARG DISTRO_NAME
+ARG DISTRO_VER
+ENV CUDA_VER=${CUDA_VER} \
+    DISTRO_NAME=${DISTRO_NAME} \
+    DISTRO_VER=${DISTRO_VER}
+
+# Set an encoding to make things work smoothly.
+ENV LANG en_US.UTF-8
+ENV LANGUAGE=en_US.UTF-8
+
+# Set path to CUDA install.
+ENV CUDA_HOME /usr/local/cuda
+
+# we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries;
+# since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former
+# the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat;
+# add this to the ldconfig so it will be found correctly.
+# don't forget to update settings by running ldconfig
+RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
+    echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
+    ldconfig
+
+# bust the docker cache so that we always rerun the installs below
+ADD https://loripsum.net/api /opt/docker/etc/gibberish
+
+# Resolves a nasty NOKEY warning that appears when using yum.
+# Naming convention changed with cos8 - see:
+# * https://lists.centos.org/pipermail/centos-devel/2019-September/017847.html
+# * https://www.centos.org/keys/#project-keys
+RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos7" ]; then \
+        rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 && \
+        rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-7-aarch64; \
+    elif [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \
+        rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial; \
+    elif [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
+        rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release; \
+    fi
+
+# Add custom `yum_clean_all` script before using `yum`
+COPY scripts/yum_clean_all /opt/docker/bin/
+
+# Fallback to CentOS vault for CentOS 8 support.
+RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \
+        find /etc/yum.repos.d/ -name "CentOS-*.repo" -exec \
+             sed -i 's/mirrorlist/#mirrorlist/g;s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' {} \; && \
+        yum update -y --disablerepo=cuda && \
+        /opt/docker/bin/yum_clean_all; \
+    fi
+
+# Install basic requirements.
+RUN yum update -y --disablerepo=cuda && \
+    yum install -y \
+        bzip2 \
+        sudo \
+        tar \
+        which \
+        && \
+    /opt/docker/bin/yum_clean_all
+
+# Fix locale in CentOS8 images
+# See https://github.com/CentOS/sig-cloud-instance-images/issues/154
+RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ] || [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
+        yum install -y glibc-langpack-en \
+        && \
+        /opt/docker/bin/yum_clean_all; \
+    fi
+
+# Remove preinclude system compilers
+RUN rpm -e --nodeps --verbose gcc gcc-c++
+
+# Run common commands
+COPY scripts/run_commands /opt/docker/bin/run_commands
+RUN /opt/docker/bin/run_commands
+
+# Download and cache CUDA related packages.
+RUN source /opt/conda/etc/profile.d/conda.sh && \
+    conda activate && \
+    conda create -n test --yes --quiet --download-only \
+        conda-forge::cudatoolkit=${CUDA_VER} \
+        && \
+    conda remove --yes --quiet -n test --all && \
+    conda clean -tiy && \
+    chgrp -R lucky /opt/conda && \
+    chmod -R g=u /opt/conda
+
+# Add a file for users to source to activate the `conda`
+# environment `root`. Also add a file that wraps that for
+# use with the `ENTRYPOINT`.
+COPY linux-anvil-aarch64-cuda/entrypoint_source /opt/docker/bin/entrypoint_source
+COPY scripts/entrypoint /opt/docker/bin/entrypoint
+
+# Ensure that all containers start with tini and the user selected process.
+# Activate the `conda` environment `root`.
+# Provide a default command (`bash`), which will start if the user doesn't specify one.
+ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ]
+CMD [ "/bin/bash" ]
diff --git a/linux-anvil-aarch64-cuda/entrypoint_source b/linux-anvil-aarch64-cuda/entrypoint_source
@@ -0,0 +1,5 @@
+# Add `CUDA_HOME` binaries to `PATH`.
+export PATH="${PATH}:${CUDA_HOME}/bin"
+
+# Activate the `base` conda environment.
+conda activate base
diff --git a/linux-anvil-cuda/Dockerfile b/linux-anvil-cuda/Dockerfile
@@ -1,72 +1,63 @@
 # Set environment variables during runtime.
 ARG CUDA_VER
-ARG DISTRO_ARCH
 ARG DISTRO_NAME
 ARG DISTRO_VER
-FROM --platform=linux/${DISTRO_ARCH} nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}
+FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER}
 
 LABEL maintainer="conda-forge <[email protected]>"
 
-# Set `ARG`s during runtime.
 ARG CUDA_VER
-ARG DISTRO_ARCH
 ARG DISTRO_NAME
 ARG DISTRO_VER
 ENV CUDA_VER=${CUDA_VER} \
-    DISTRO_ARCH=${DISTRO_ARCH} \
     DISTRO_NAME=${DISTRO_NAME} \
     DISTRO_VER=${DISTRO_VER}
 
 # Set an encoding to make things work smoothly.
 ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
 
 # Set path to CUDA install (this is a symlink to /usr/local/cuda-${CUDA_VER})
 ENV CUDA_HOME /usr/local/cuda
 
-# bust the docker cache so that we always rerun the installs below
-ADD https://loripsum.net/api /opt/docker/etc/gibberish
-
-# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed
-ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static
-ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static
-
 # we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries;
 # since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former
-# the upstream images all have libcuda.so under $CUDA_HOME/compat;
-# add this to the ldconfig so it will be found correctly.
-# don't forget to update settings by running ldconfig
-RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
-    echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \
-    ldconfig
+RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf \
+    && if [ ${CUDA_VER} != "9.2" ]; then \
+        # the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat;
+        # add this to the ldconfig so it will be found correctly.
+        echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \
+    else \
+        # For 9.2, the image nvidia/cuda:9.2-devel-centos6 contains neither
+        # $CUDA_HOME/compat, nor any (non-stub) libcuda.so. We fix this by
+        # adding cuda-compat-10.0 (which is not used for building, but to
+        # test if loading the respective library/package works). However,
+        # due to licensing reasons, these cannot be part of the conda-forge
+        # docker images, but are instead added for CI purposes in:
+        # github.com/conda-forge/conda-forge-ci-setup-feedstock/blob/master/recipe/run_conda_forge_build_setup_linux
+        # Here we only set the ldconfig accordingly.
+        echo "/usr/local/cuda-10.0/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \
+    fi \
+    # don't forget to update settings by running ldconfig
+    && ldconfig
+
+# bust the docker cache so that we always rerun the installs below
+ADD https://loripsum.net/api /opt/docker/etc/gibberish
 
 # Add the archived repo URL and fix RPM imports
 ADD centos7-repos /tmp/centos7-repos
 ADD scripts/fix_rpm /opt/docker/bin/fix_rpm
 RUN /opt/docker/bin/fix_rpm
 
-# Add custom `yum_clean_all` script before using `yum`
-COPY scripts/yum_clean_all /opt/docker/bin/
-
 # Install basic requirements.
+COPY scripts/yum_clean_all /opt/docker/bin/
 RUN yum update -y --disablerepo=cuda && \
     yum install -y \
         bzip2 \
         sudo \
         tar \
-        which \
-    && \
+        which && \
     /opt/docker/bin/yum_clean_all
 
-# Fix locale in UBI 8 images
-# See https://github.com/CentOS/sig-cloud-instance-images/issues/154
-RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \
-        yum install -y \
-            glibc-langpack-en \
-        && \
-        /opt/docker/bin/yum_clean_all; \
-    fi
-
 # Remove preinclude system compilers
 RUN rpm -e --nodeps --verbose gcc gcc-c++
 
@@ -85,14 +76,26 @@ RUN source /opt/conda/etc/profile.d/conda.sh && \
     chgrp -R lucky /opt/conda && \
     chmod -R g=u /opt/conda
 
+# Symlink CUDA headers that were moved from $CUDA_HOME/include to /usr/include
+# in CUDA 10.1.
+RUN for HEADER_FILE in cublas_api.h cublas.h cublasLt.h cublas_v2.h cublasXt.h nvblas.h; do \
+    if [[ ! -f "${CUDA_HOME}/include/${HEADER_FILE}" ]]; \
+      then ln -s "/usr/include/${HEADER_FILE}" "${CUDA_HOME}/include/${HEADER_FILE}"; \
+    fi; \
+    done
+
+# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed
+ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static
+ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static
+
 # Add a file for users to source to activate the `conda`
 # environment `base`. Also add a file that wraps that for
 # use with the `ENTRYPOINT`.
 COPY linux-anvil-cuda/entrypoint_source /opt/docker/bin/entrypoint_source
 COPY scripts/entrypoint /opt/docker/bin/entrypoint
 
 # Ensure that all containers start with tini and the user selected process.
-# Activate the `conda` environment `base`.
+# Activate the `conda` environment `base` and the devtoolset compiler.
 # Provide a default command (`bash`), which will start if the user doesn't specify one.
 ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ]
 CMD [ "/bin/bash" ]