Skip to content

Commit

Permalink
ARROW-14932: [CI] Prefer mamba over conda
Browse files Browse the repository at this point in the history
Mamba should provide quicker image builds compared to conda.

Closes apache#11816 from kszucs/mamba

Authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Krisztián Szűcs <[email protected]>
  • Loading branch information
kszucs committed Dec 1, 2021
1 parent bca0681 commit f416111
Show file tree
Hide file tree
Showing 25 changed files with 120 additions and 133 deletions.
45 changes: 28 additions & 17 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,55 @@ DOCKER_VOLUME_PREFIX=

# turn on inline build cache, this is a docker buildx feature documented
# at https://github.com/docker/buildx#--cache-tonametypetypekeyvalue
BUILDKIT_INLINE_CACHE=1
COMPOSE_DOCKER_CLI_BUILD=1
DOCKER_BUILDKIT=1
BUILDKIT_INLINE_CACHE=1

# different architecture notations
ARCH=amd64
ARCH_ALIAS=x86_64
ARCH_SHORT=amd64

ULIMIT_CORE=-1
# Default repository to pull and push images from
REPO=apache/arrow-dev
CUDA=9.1

# The setup attempts to generate coredumps by default, in order to disable the
# coredump generation set it to 0
ULIMIT_CORE=-1

# Default versions for platforms
DEBIAN=11
UBUNTU=20.04
FEDORA=33
PYTHON=3.6
LLVM=12
UBUNTU=20.04

# Default versions for various dependencies
CLANG_TOOLS=8
CUDA=9.1
DASK=latest
DOTNET=3.1
GCC_VERSION=""
GO=1.15
NODE=14
MAVEN=3.5.4
HDFS=3.2.1
JDK=8
KARTOTHEK=latest
LLVM=12
MAVEN=3.5.4
NODE=14
NUMPY=latest
PANDAS=latest
DASK=latest
TURBODBC=latest
KARTOTHEK=latest
HDFS=3.2.1
SPARK=master
DOTNET=3.1
PYTHON=3.6
R=4.1
ARROW_R_DEV=TRUE
GCC_VERSION=""
SPARK=master
TURBODBC=latest

# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest
R_ORG=rhub
ARROW_R_DEV=TRUE
R_IMAGE=ubuntu-gcc-release
R_ORG=rhub
R_PRUNE_DEPS=FALSE
R_TAG=latest
TZ=UTC

# -1 does not attempt to install a devtoolset version, any positive integer will install devtoolset-n
DEVTOOLSET_VERSION=-1

Expand Down
9 changes: 7 additions & 2 deletions ci/appveyor-cpp-setup.bat
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ conda config --set remote_connect_timeout_secs 12
conda config --append disallowed_packages pypy3
conda info -a

@rem
@rem Install mamba to the base environment
@rem
conda install -q -y -c conda-forge mamba

@rem
@rem Create conda environment for Build and Toolchain jobs
@rem
Expand All @@ -60,7 +65,7 @@ if "%JOB%" == "Toolchain" (
)
if "%JOB%" NEQ "Build_Debug" (
@rem Arrow conda environment is only required for the Build and Toolchain jobs
conda create -n arrow -q -y -c conda-forge ^
mamba create -n arrow -q -y -c conda-forge ^
--file=ci\conda_env_python.txt ^
%CONDA_PACKAGES% ^
"cmake=3.17" ^
Expand All @@ -74,7 +79,7 @@ if "%JOB%" NEQ "Build_Debug" (
@rem On Windows, GTest is always bundled from source instead of using
@rem conda binaries, avoid any interference between the two versions.
if "%JOB%" == "Toolchain" (
conda uninstall -n arrow -q -y -c conda-forge gtest
mamba uninstall -n arrow -q -y -c conda-forge gtest
)
)

Expand Down
10 changes: 8 additions & 2 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,23 @@ ARG repo
ARG arch
FROM ${repo}:${arch}-conda

COPY ci/scripts/install_minio.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

# install the required conda packages into the test environment
COPY ci/conda_env_cpp.txt \
ci/conda_env_gandiva.txt \
/arrow/ci/
RUN conda install \
RUN mamba install \
--file arrow/ci/conda_env_cpp.txt \
--file arrow/ci/conda_env_gandiva.txt \
compilers \
doxygen \
valgrind && \
conda clean --all
mamba clean --all

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda-integration.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ARG go=1.15

# Install Archery and integration dependencies
COPY ci/conda_env_archery.txt /arrow/ci/
RUN conda install -q \
RUN mamba install -q \
--file arrow/ci/conda_env_archery.txt \
"python>=3.7" \
numpy \
Expand All @@ -36,7 +36,7 @@ RUN conda install -q \
nodejs=${node} \
yarn \
openjdk=${jdk} && \
conda clean --all --force-pkgs-dirs
mamba clean --all --force-pkgs-dirs

# Install Rust with only the needed components
# (rustfmt is needed for tonic-build to compile the protobuf definitions)
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda-python-hdfs.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ FROM ${repo}:${arch}-conda-python-${python}

ARG jdk=8
ARG maven=3.5
RUN conda install -q \
RUN mamba install -q \
maven=${maven} \
openjdk=${jdk} \
pandas && \
conda clean --all
mamba clean --all

# installing libhdfs (JNI)
ARG hdfs=3.2.1
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda-python-jpype.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ FROM ${repo}:${arch}-conda-python-${python}

ARG jdk=11
ARG maven=3.6
RUN conda install -q \
RUN mamba install -q \
maven=${maven} \
openjdk=${jdk} \
jpype1 && \
conda clean --all
mamba clean --all
4 changes: 2 additions & 2 deletions ci/docker/conda-python-kartothek.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ARG python=3.6
FROM ${repo}:${arch}-conda-python-${python}

# install kartothek dependencies from conda-forge
RUN conda install -c conda-forge -q \
RUN mamba install -c conda-forge -q \
attrs \
click \
cloudpickle \
Expand All @@ -40,7 +40,7 @@ RUN conda install -c conda-forge -q \
toolz \
urlquote \
zstandard && \
conda clean --all
mamba clean --all

ARG kartothek=latest
COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/conda-python-pandas.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ FROM ${repo}:${arch}-conda-python-${python}
ARG pandas=latest
ARG numpy=latest
COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/
RUN conda uninstall -q -y numpy && \
RUN mamba uninstall -q -y numpy && \
/arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy}
4 changes: 2 additions & 2 deletions ci/docker/conda-python-spark.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ FROM ${repo}:${arch}-conda-python-${python}
ARG jdk=8
ARG maven=3.5

RUN conda install -q \
RUN mamba install -q \
openjdk=${jdk} \
maven=${maven} \
pandas && \
conda clean --all
mamba clean --all

# installing specific version of spark
ARG spark=master
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda-python-turbodbc.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
rm -rf /var/lib/apt/lists/*

# install turbodbc dependencies from conda-forge
RUN conda install -c conda-forge -q \
RUN mamba install -c conda-forge -q \
pybind11 \
pytest-cov \
mock \
unixodbc && \
conda clean --all
mamba clean --all

RUN service postgresql start && \
sudo -u postgres psql -U postgres -c \
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda-python.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ FROM ${repo}:${arch}-conda-cpp
# install python specific packages
ARG python=3.6
COPY ci/conda_env_python.txt /arrow/ci/
RUN conda install -q \
RUN mamba install -q \
--file arrow/ci/conda_env_python.txt \
$([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
python=${python} \
nomkl && \
conda clean --all
mamba clean --all

ENV ARROW_PYTHON=ON \
ARROW_BUILD_STATIC=OFF \
Expand Down
27 changes: 9 additions & 18 deletions ci/docker/conda.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,29 @@
ARG arch=amd64
FROM ${arch}/ubuntu:18.04

# arch is unset after the FROM statement, so need to define it again
ARG arch=amd64
ARG prefix=/opt/conda

# install build essentials
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update -y -q && \
apt-get install -y -q wget tzdata libc6-dbg gdb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ENV PATH=${prefix}/bin:$PATH
# install conda and minio
COPY ci/scripts/install_conda.sh \
ci/scripts/install_minio.sh \
/arrow/ci/scripts/
RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix}
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix}
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
# install conda and mamba via mambaforge
COPY ci/scripts/install_conda.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_conda.sh mambaforge latest /opt/conda
ENV PATH=/opt/conda/bin:$PATH

# create a conda environment
ADD ci/conda_env_unix.txt /arrow/ci/
RUN conda create -n arrow --file arrow/ci/conda_env_unix.txt git && \
conda clean --all
RUN mamba create -n arrow --file arrow/ci/conda_env_unix.txt git && \
mamba clean --all

# activate the created environment by default
RUN echo "conda activate arrow" >> ~/.profile
ENV CONDA_PREFIX=${prefix}/envs/arrow
ENV CONDA_PREFIX=/opt/conda/envs/arrow

# use login shell to activate arrow environment un the RUN commands
SHELL [ "/bin/bash", "-c", "-l" ]
SHELL ["/bin/bash", "-c", "-l"]

# use login shell when running the container
ENTRYPOINT [ "/bin/bash", "-c", "-l" ]
ENTRYPOINT ["/bin/bash", "-c", "-l"]
3 changes: 1 addition & 2 deletions ci/docker/debian-10-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

ARG arch=amd64
FROM ${arch}/debian:10
ARG arch

ENV DEBIAN_FRONTEND noninteractive

Expand Down Expand Up @@ -73,7 +72,7 @@ RUN apt-get update -y -q && \
rm -rf /var/lib/apt/lists/*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
5 changes: 3 additions & 2 deletions ci/docker/debian-11-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ RUN apt-get update -y -q && \
rm -rf /var/lib/apt/lists/*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
5 changes: 3 additions & 2 deletions ci/docker/fedora-33-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ RUN dnf update -y && \
zlib-devel

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
Expand Down
6 changes: 3 additions & 3 deletions ci/docker/linux-apt-r.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

ARG base
FROM ${base}
ARG arch

ARG tz="UTC"
ENV TZ=${tz}
Expand Down Expand Up @@ -92,9 +91,10 @@ COPY r/DESCRIPTION /arrow/r/
RUN /arrow/ci/scripts/r_deps.sh /arrow

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY python/requirements-build.txt /arrow/python/
RUN pip install -r arrow/python/requirements-build.txt
Expand Down
6 changes: 3 additions & 3 deletions ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

ARG base=amd64/ubuntu:20.04
FROM ${base}
ARG arch

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

Expand All @@ -38,9 +37,10 @@ RUN apt-get update -y -q && \
rm -rf /var/lib/apt/lists*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
7 changes: 4 additions & 3 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

ARG base=amd64/ubuntu:20.04
FROM ${base}
ARG arch

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

Expand Down Expand Up @@ -99,9 +98,11 @@ RUN apt-get update -y -q && \
rm -rf /var/lib/apt/lists*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_ceph.sh

Expand Down
Loading

0 comments on commit f416111

Please sign in to comment.