Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize gpu-celery container #751

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
202 changes: 48 additions & 154 deletions gpu-celery/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,137 +1,22 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
#
# THIS IS A GENERATED DOCKERFILE.
#
# This file was assembled from multiple pieces, whose use is documented
# throughout. Please refer to the TensorFlow dockerfiles documentation
# for more information.

ARG UBUNTU_VERSION=18.04
ARG CUDA=11.2
FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}.1-base-ubuntu${UBUNTU_VERSION} as base
# ARCH and CUDA are specified again because the FROM directive resets ARGs
# (but their default value is retained if set previously)
ARG ARCH
ARG CUDA
ARG CUDNN=8.1.0.77-1
ARG CUDNN_MAJOR_VERSION=8
ARG LIB_DIR_PREFIX=x86_64
ARG LIBNVINFER=7.2.2-1
ARG LIBNVINFER_MAJOR_VERSION=7

# The following two arguments are rodan-specific
ARG BRANCHES
ARG VERSION

# Needed for string substitution
SHELL ["/bin/bash", "-c"]
# Pick up some TF dependencies
#RUN apt-get update

#RUN rm -rf /etc/apt/sources.list.d/cuda.list

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-${CUDA/./-} \
libcublas-${CUDA/./-} \
cuda-nvrtc-${CUDA/./-} \
libcufft-${CUDA/./-} \
libcurand-${CUDA/./-} \
libcusolver-${CUDA/./-} \
libcusparse-${CUDA/./-} \
curl \
libcudnn8=${CUDNN}+cuda${CUDA} \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
pkg-config \
software-properties-common \
unzip \
wget
# added wget

# Install TensorRT if not building for PowerPC
# NOTE: libnvinfer uses cuda11.1 versions
# RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \
# apt-get install -y --no-install-recommends libnvinfer${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
# libnvinfer-plugin${LIBNVINFER_MAJOR_VERSION}=${LIBNVINFER}+cuda11.1 \
# && apt-get clean \
# && rm -rf /var/lib/apt/lists/*; }

# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH

# Link the libcuda stub to the location where tensorflow is searching for it and reconfigure
# dynamic linker run-time bindings
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
&& echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/z-cuda-stubs.conf \
&& ldconfig

# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8

# This section differs from the default tensorflow2.5.1 Dockerfile, because we specifically add python 3.7;
ARG PYTHON=python3.7
ARG TF_PACKAGE=tensorflow
ARG TF_PACKAGE_VERSION=2.5.1

RUN apt-get update && apt-get install -y --no-install-recommends \
python3.7 \
python3-pip \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
&& wget https://bootstrap.pypa.io/get-pip.py \
&& ${PYTHON} get-pip.py \
&& ln -sf /usr/bin/${PYTHON} /usr/local/bin/python3 \
&& ln -sf /usr/local/bin/pip /usr/local/bin/pip3 \
&& pip3 --no-cache-dir install --upgrade pip setuptools==57.0.0 \
# Some TF tools expect a "python" binary
&& ln -s $(which python3) /usr/local/bin/python \
&& python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}

# RUN ln -s $(which python3) /usr/local/bin/python

# Options:
# tensorflow
# tensorflow-gpu
# tf-nightly
# tf-nightly-gpu
# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version.
# Installs the latest version by default.

# COPY bashrc /etc/bash.bashrc
# RUN chmod a+rwx /etc/bash.bashrc

# This ends the material obtained from TensorFlow's dockerfile. the remainder is rodan-docker-specific setup.

# FROM base
RUN set -e \
&& apt-get update \
&& DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \
FROM alpine:3 AS builder
ARG BRANCH
ENV BRANCH="${BRANCH:-develop}"
RUN apk update
RUN apk add git
# Download Rodan
WORKDIR /
RUN git clone --recurse-submodules -b "${BRANCH}" https://github.com/ddmal/Rodan

FROM tensorflow/tensorflow:2.10.0rc1-gpu
RUN apt-get update
RUN DEBIAN_FRONTEND="noninteractive" apt-get install -yqq \
git \
# Python lxml dependencies
python3.7-dev \
python3-opencv \
libxml2-dev \
libxslt1-dev \
zlib1g-dev \
lib32ncurses5-dev \
python3-lxml \
# Psycopg2 dependencies
libpq-dev \
# OpenCV dependencies
libsm6 libxext6 libxrender-dev libglib2.0-data \
python3-opencv \
# For resource identification
libmagic-dev \
unzip \
Expand All @@ -145,41 +30,50 @@ RUN pip uninstall opencv-python-headless && pip install opencv-python-headless==

# Install GPU Rodan Jobs
COPY ./scripts/install_gpu_rodan_jobs /opt/
# Install Rodan
# Runs on both Rodan service, and Rodan-Celery
COPY ./scripts/entrypoint /opt/
COPY ./scripts/start-celery /run/
COPY ./scripts/wait-for-app /run/

# Copying rodan core from build context into container
# Rodan folder MUST be uppercase, otherwise many unittests fail.
#TODO(buser): Swap this line out once pushed to develop.
COPY ./rodan-main/code /code/Rodan
# COPY --from=builder /Rodan/rodan-main/code /code/Rodan

RUN mkdir -p /code/jobs

# Install GPU Jobs
RUN chmod +x /opt/install_gpu_rodan_jobs
RUN chown www-data /opt/install_gpu_rodan_jobs
RUN /opt/install_gpu_rodan_jobs

# Install Rodan
RUN sed -i "s/pip /pip3 /g" /run/install_rodan
RUN sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt
RUN sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt

# necessary for scikit-image > 0.17, or else it will try to make a cache directory
# in a place where the www-data user does not have permissions to do so
ENV SKIMAGE_DATADIR "/tmp/.skimage_cache"

# Change the concurency for gpu jobs because Calvo is very expensive
RUN sed -i "s/=10/=1/g" /run/start-celery

# Install Rodan
RUN chmod +x /run/install_rodan
RUN chown www-data /run/install_rodan
RUN /run/install_rodan

# Add Celery script
RUN chmod +x /run/start-celery
RUN chown www-data /run/start-celery

RUN set -x \
# Create Folders
&& mkdir -p /code/jobs \
# Install GPU Jobs
&& chmod +x /opt/install_gpu_rodan_jobs \
&& /opt/install_gpu_rodan_jobs \
# Install Rodan
&& sed -i "s/lxml/#lxml/g" /code/Rodan/requirements.txt \
# && sed -i "s/pybagit==1.5.0/-e git:\/\/github.com\/deepio\/pybagit.git@a27c9e0fc3bdf99dab8bd327f3ce9ea884abd6b4#egg=pybagit/g" /code/Rodan/requirements.txt \
# Add Entrypoints
&& sed -i 's/\r//' /opt/entrypoint \
&& chmod +x /opt/entrypoint \
# Add Celery script
&& chmod +x /run/start-celery \
# Change the concurency for gpu jobs because Calvo is very expensive
&& sed -i "s/=10/=1/g" /run/start-celery \
# Script to wait for postgres and redis to be running before attempting to connect to them.
&& chmod +x /run/wait-for-app
# Change the concurency for gpu jobs because Calvo is very expensive
RUN sed -i "s/=10/=1/g" /run/start-celery

# Install Rodan.
RUN pip3 install -r /code/Rodan/requirements.txt
# Script to wait for postgres and redis to be running before attempting to connect to them.
RUN chmod +x /run/wait-for-app
RUN chown www-data /run/wait-for-app
RUN chown -R www-data /code/Rodan /code/jobs

ENTRYPOINT ["/opt/entrypoint"]
ENTRYPOINT ["/run/start-celery"]
20 changes: 10 additions & 10 deletions hooks/build
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ docker build \
--no-cache \
--build-arg BRANCHES="develop" \
--build-arg VERSION=${RODAN_TAG} \
--tag ddmal/rodan-python3-celery:nightly \
--tag ddmal/rodan-python3-celery:smaller \
--tag ddmal/rodan-python3-celery:${RODAN_TAG} \
--file ./python3-celery/Dockerfile \
.

echo "[+] Pushing Python3-Celery"
docker push ddmal/rodan-python3-celery:nightly
docker push ddmal/rodan-python3-celery:smaller

echo "[+] Python3-Celery needs to be made and pushed before Rodan/Celery because the Rodan image uses the Python3 image as its base."

Expand All @@ -54,7 +54,7 @@ docker build \
--build-arg BRANCHES="develop" \
--build-arg VERSION=${RODAN_TAG} \
--build-arg build_hash=${BUILD_HASH} \
--tag ddmal/rodan-main:nightly \
--tag ddmal/rodan-main:smaller \
--tag ddmal/rodan-main:${RODAN_TAG} \
--file ./rodan-main/Dockerfile \
.
Expand All @@ -72,7 +72,7 @@ docker build \
--no-cache \
--build-arg BRANCHES="develop" \
--build-arg VERSION=${RODAN_CLIENT_TAG} \
--tag ddmal/rodan-client:nightly \
--tag ddmal/rodan-client:smaller \
--tag ddmal/rodan-client:${RODAN_CLIENT_TAG} \
./rodan-client

Expand All @@ -83,7 +83,7 @@ if [ -z `echo ${RODAN_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/rodan-main:${RODAN_TAG}
fi

docker push ddmal/rodan-main:nightly
docker push ddmal/rodan-main:smaller

echo "[+] Pushing Rodan-Client"

Expand All @@ -92,7 +92,7 @@ if [ -z `echo ${RODAN_CLIENT_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/rodan-client:${RODAN_CLIENT_TAG}
fi

docker push ddmal/rodan-client:nightly
docker push ddmal/rodan-client:smaller

###############################################################################
# Stage 3
Expand All @@ -104,7 +104,7 @@ docker build \
--no-cache \
--build-arg BRANCHES="develop" \
--build-arg VERSION=${RODAN_TAG} \
--tag ddmal/rodan-gpu-celery:nightly \
--tag ddmal/rodan-gpu-celery:smaller \
--tag ddmal/rodan-gpu-celery:${RODAN_TAG} \
--file ./gpu-celery/Dockerfile \
.
Expand All @@ -114,7 +114,7 @@ echo "[+] Building Postgres"
docker build \
--no-cache \
--build-arg VERSION=${RODAN_DOCKER_TAG} \
--tag ddmal/postgres-plpython:nightly \
--tag ddmal/postgres-plpython:smaller \
--tag ddmal/postgres-plpython:${RODAN_DOCKER_TAG} \
--file ./postgres/Dockerfile \
.
Expand All @@ -124,7 +124,7 @@ echo "[+] Building Nginx"
docker build \
--no-cache \
--build-arg VERSION=${RODAN_DOCKER_TAG} \
--tag ddmal/nginx:nightly \
--tag ddmal/nginx:smaller \
--tag ddmal/nginx:${RODAN_DOCKER_TAG} \
./nginx

Expand All @@ -133,7 +133,7 @@ echo "[+] Building HPC-RabbitMQ"
docker build \
--no-cache \
--build-arg VERSION=${RODAN_DOCKER_TAG} \
--tag ddmal/hpc-rabbitmq:nightly \
--tag ddmal/hpc-rabbitmq:smaller \
--tag ddmal/hpc-rabbitmq:${RODAN_DOCKER_TAG} \
./hpc-rabbitmq

Expand Down
10 changes: 5 additions & 5 deletions hooks/push
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,36 @@ if [ -z `echo ${RODAN_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/rodan-python3-celery:${RODAN_TAG}
fi

docker push ddmal/rodan-python3-celery:nightly
docker push ddmal/rodan-python3-celery:smaller

echo "[+] Pushing GPU-Celery"

if [ -z `echo ${RODAN_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/rodan-gpu-celery:${RODAN_TAG}
fi

docker push ddmal/rodan-gpu-celery:nightly
docker push ddmal/rodan-gpu-celery:smaller

echo "[+] Pushing Postgres"

if [ -z `echo ${RODAN_DOCKER_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/postgres-plpython:${RODAN_DOCKER_TAG}
fi

docker push ddmal/postgres-plpython:nightly
docker push ddmal/postgres-plpython:smaller

echo "[+] Pushing Nginx"

if [ -z `echo ${RODAN_DOCKER_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/nginx:${RODAN_DOCKER_TAG}
fi

docker push ddmal/nginx:nightly
docker push ddmal/nginx:smaller

echo "[+] Pushing HPC-RabbitMQ"

if [ -z `echo ${RODAN_DOCKER_TAG} | awk -F'-' '{print $3}'` ]; then
docker push ddmal/hpc-rabbitmq:${RODAN_DOCKER_TAG}
fi

docker push ddmal/hpc-rabbitmq:nightly
docker push ddmal/hpc-rabbitmq:smaller
4 changes: 0 additions & 4 deletions rodan-main/code/rodan/jobs/Calvo_classifier/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,5 @@ html5lib==0.9999999
Keras==2.3.1
numpy==1.16.6; python_version < "3.4"
numpy==1.19.2; python_version > "3.4"
opencv-python==3.2.0.8; python_version < "3.7"
opencv-python==3.4.2.17; python_version >= "3.7"
scipy>=1.0.0
six>=1.12.0
tensorflow==1.14.0; python_version < "3.4"
tensorflow==2.5.1; python_version > "3.4"
10 changes: 0 additions & 10 deletions scripts/entrypoint

This file was deleted.