Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2039] fix(docker): Make docker build script work for Hadoop3.2 #2040

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions bin/start-coordinator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,15 @@ done
mkdir -p "${RSS_LOG_DIR}"
mkdir -p "${RSS_PID_DIR}"

set +u
if [ $HADOOP_HOME ]; then
if [ -n "${HADOOP_HOME:-}" ]; then
HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
fi

if [ $HADOOP_CONF_DIR ]; then
if [ -n "${HADOOP_CONF_DIR:-}" ]; then
CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
fi
set -u

echo "class path is $CLASSPATH"

Expand Down
6 changes: 2 additions & 4 deletions bin/start-shuffle-server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,15 @@ done
mkdir -p "${RSS_LOG_DIR}"
mkdir -p "${RSS_PID_DIR}"

set +u
if [ $HADOOP_HOME ]; then
if [ -n "${HADOOP_HOME:-}" ]; then
HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
fi

if [ "$HADOOP_CONF_DIR" ]; then
if [ -n "${HADOOP_CONF_DIR:-}" ]; then
CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
fi
set -u

echo "class path is $CLASSPATH"

Expand Down
14 changes: 5 additions & 9 deletions bin/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,15 @@ function load_rss_env {
fi

# export default value
set +o nounset
if [ -z "$HADOOP_CONF_DIR" ] && [ "$HADOOP_HOME" ]; then
if [ -z "${HADOOP_CONF_DIR:-}" ] && [ -n "${HADOOP_HOME:-}" ]; then
HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
fi
if [ -z "$RSS_LOG_DIR" ]; then
if [ -z "${RSS_LOG_DIR:-}" ]; then
RSS_LOG_DIR="${RSS_HOME}/logs"
fi
if [ -z "$RSS_PID_DIR" ]; then
if [ -z "${RSS_PID_DIR:-}" ]; then
RSS_PID_DIR="${RSS_HOME}"
fi
set -o nounset

RUNNER="${JAVA_HOME}/bin/java"
JPS="${JAVA_HOME}/bin/jps"
Expand All @@ -208,14 +206,12 @@ function load_rss_env {
echo "Using RSS from ${RSS_HOME}"
echo "Using RSS conf from ${RSS_CONF_DIR}"

set +u
if [ $HADOOP_HOME ]; then
if [ -n "${HADOOP_HOME:-}" ]; then
echo "Using Hadoop from ${HADOOP_HOME}"
fi
if [ $HADOOP_CONF_DIR ]; then
if [ -n "${HADOOP_CONF_DIR:-}" ]; then
echo "Using Hadoop conf from ${HADOOP_CONF_DIR}"
fi
set -u

echo "Write log file to ${RSS_LOG_DIR}"
echo "Write pid file to ${RSS_PID_DIR}"
Expand Down
19 changes: 5 additions & 14 deletions deploy/kubernetes/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,12 @@ RUN mkdir -p /data/rssadmin/
RUN chown -R rssadmin:rssadmin /data
USER rssadmin

COPY rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz /data/rssadmin
RUN tar -xvf /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz -C /data/rssadmin
RUN mv /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION} /data/rssadmin/rss
RUN rm /data/rssadmin/rss/conf/rss-env.sh
RUN rm -rf /data/rssadmin/rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz
COPY rss.tgz /data/rssadmin
RUN tar -xvf /data/rssadmin/rss.tgz -C /data/rssadmin
RUN rm -rf /data/rssadmin/rss.tgz

COPY rss-env.sh /data/rssadmin/rss/conf

COPY start.sh /data/rssadmin/rss/bin

COPY hadoop-${HADOOP_VERSION}.tar.gz /data/rssadmin
RUN tar -zxvf /data/rssadmin/hadoop-${HADOOP_VERSION}.tar.gz -C /data/rssadmin
RUN mv /data/rssadmin/hadoop-${HADOOP_VERSION} /data/rssadmin/hadoop
RUN rm -rf /data/rssadmin/hadoop-${HADOOP_VERSION}.tar.gz
COPY hadoopconfig/ /data/rssadmin/hadoop/etc/hadoop
COPY --chown=rssadmin:rssadmin --chmod=744 rss-env.sh /data/rssadmin/rss/conf
COPY --chown=rssadmin:rssadmin --chmod=744 start.sh /data/rssadmin/rss/bin

ENV RSS_VERSION ${RSS_VERSION}
ENV HADOOP_VERSION ${HADOOP_VERSION}
Expand Down
68 changes: 52 additions & 16 deletions deploy/kubernetes/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ function exit_with_usage() {
echo "./build.sh - Tool for building docker images of Remote Shuffle Service"
echo ""
echo "Usage:"
echo "+------------------------------------------------------------------------------------------------------+"
echo "| ./build.sh [--hadoop-version <hadoop version>] [--registry <registry url>] [--author <author name>] |"
echo "| [--base-os-distribution <os distribution>] [--base-image <base image url>] |"
echo "| [--push-image <true|false>] [--apache-mirror <apache mirror url>] |"
echo "+------------------------------------------------------------------------------------------------------+"
echo "+---------------------------------------------------------------------------------------+"
echo "| ./build.sh [--hadoop-version <hadoop version>] [--hadoop-provided <true|false>] |"
echo "| [--registry <registry url>] [--author <author name>] |"
echo "| [--base-os-distribution <os distribution>] [--base-image <base image url>] |"
echo "| [--push-image <true|false>] [--apache-mirror <apache mirror url>] |"
echo "+---------------------------------------------------------------------------------------+"
exit 1
}

REGISTRY="docker.io/library"
HADOOP_VERSION=2.8.5
HADOOP_SHORT_VERSION=$(echo $HADOOP_VERSION | awk -F "." '{print $1"."$2}')
HADOOP_PROVIDED="true"
AUTHOR=$(whoami)
# If you are based in China, you could pass --apache-mirror <a_mirror_url> when building this.
APACHE_MIRROR="https://dlcdn.apache.org"
Expand All @@ -52,6 +54,11 @@ while (( "$#" )); do
;;
--hadoop-version)
HADOOP_VERSION="$2"
HADOOP_SHORT_VERSION=$(echo $HADOOP_VERSION | awk -F "." '{print $1"."$2}')
shift
;;
--hadoop-provided)
HADOOP_PROVIDED="$2"
shift
;;
--author)
Expand Down Expand Up @@ -101,14 +108,15 @@ else
echo "using base image(${BASE_IMAGE}) to build rss server"
fi


HADOOP_FILE=hadoop-${HADOOP_VERSION}.tar.gz
ARCHIVE_HADOOP_URL=https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE}
HADOOP_URL=${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE}
echo "HADOOP_URL is either ${HADOOP_URL} or ${ARCHIVE_HADOOP_URL}"
if [ ! -e "$HADOOP_FILE" ]; \
then wget "${HADOOP_URL}" || wget "$ARCHIVE_HADOOP_URL"; \
else echo "${HADOOP_FILE} has been downloaded"; \
if [ "$HADOOP_PROVIDED" == "true" ]; then
HADOOP_FILE=hadoop-${HADOOP_VERSION}.tar.gz
ARCHIVE_HADOOP_URL=https://archive.apache.org/dist/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE}
HADOOP_URL=${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_FILE}
echo "HADOOP_URL is either ${HADOOP_URL} or ${ARCHIVE_HADOOP_URL}"
if [ ! -e "$HADOOP_FILE" ]; \
then wget "${HADOOP_URL}" || wget "$ARCHIVE_HADOOP_URL"; \
else echo "${HADOOP_FILE} has been downloaded"; \
fi
fi

RSS_DIR=../../..
Expand All @@ -117,13 +125,40 @@ RSS_VERSION=$(./mvnw help:evaluate -Dexpression=project.version 2>/dev/null | gr
RSS_FILE=rss-${RSS_VERSION}-hadoop${HADOOP_SHORT_VERSION}.tgz
echo "RSS_VERSION: $RSS_VERSION"
echo "RSS_FILE: $RSS_FILE"
if [ ! -e "$RSS_FILE" ]; \
then bash ./build_distribution.sh; \
else echo "$RSS_FILE has been built"; \
if [ ! -e "$RSS_FILE" ]; then
if [ "$HADOOP_PROVIDED" == "true" ]; then
if [ "$HADOOP_SHORT_VERSION" == "3.2" ]; then
HADOOP_PROFILE="-Phadoop-dependencies-provided -Pnetty-4.1.68.Final"
else
HADOOP_PROFILE="-Phadoop-dependencies-provided"
fi
else
HADOOP_PROFILE="-Phadoop-dependencies-included"
fi
bash ./build_distribution.sh --hadoop-profile hadoop${HADOOP_SHORT_VERSION} ${HADOOP_PROFILE:-}
else
echo "$RSS_FILE has been built"
fi
cd "$OLDPWD" || exit
cp "$RSS_DIR/$RSS_FILE" .

# prepare rss.tgz, which will become the content of /data/rssadmin
if [ ! -e rss.tgz ]; then
rm -rf tmp; mkdir -p tmp; cd tmp
tar -xzf "../$RSS_FILE"
mv "${RSS_FILE/%.tgz/}" rss

# add hadoop binaries to tgz
if [ "$HADOOP_PROVIDED" == "true" ]; then
tar -xzf "../hadoop-${HADOOP_VERSION}.tar.gz"
mv "hadoop-${HADOOP_VERSION}" hadoop
cp -r ../hadoopconfig/ hadoop/etc/hadoop
fi
tar -czf ../rss.tgz *
cd "$OLDPWD" || exit
fi


GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
GIT_COMMIT=$(git describe --dirty --always --tags | sed 's/-/./g')
echo "image version: ${IMAGE_VERSION:=$RSS_VERSION-$GIT_COMMIT}"
Expand All @@ -133,6 +168,7 @@ docker build --network=host -t "$IMAGE" \
--build-arg RSS_VERSION="$RSS_VERSION" \
--build-arg HADOOP_VERSION="$HADOOP_VERSION" \
--build-arg HADOOP_SHORT_VERSION="$HADOOP_SHORT_VERSION" \
--build-arg HADOOP_PROVIDED="$HADOOP_PROVIDED" \
--build-arg AUTHOR="$AUTHOR" \
--build-arg GIT_COMMIT="$GIT_COMMIT" \
--build-arg GIT_BRANCH="$GIT_BRANCH" \
Expand Down
5 changes: 4 additions & 1 deletion deploy/kubernetes/docker/rss-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ set -o pipefail
set -o nounset # exit the script if you try to use an uninitialised variable
set -o errexit # exit the script if any statement returns a non-true return value

HADOOP_HOME="/data/rssadmin/hadoop"
if [ -d "/data/rssadmin/hadoop" ]; then
export HADOOP_HOME="/data/rssadmin/hadoop"
fi

RUNNER="${JAVA_HOME}/bin/java"
JPS="${JAVA_HOME}/bin/jps"

Expand Down
4 changes: 4 additions & 0 deletions docs/operator/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ Run the following command:
cd deploy/kubernetes/docker && sh build.sh --registry ${our-registry}
```

This compiles RSS with Hadoop 2.8 support und add the Hadoop binaries to the Docker image.
Use `--hadoop-version x.y.z` to choose a different Hadoop version. Use `--hadoop-provided false` to **not**
include the Hadoop installation in the image.

## Creating or Updating CRD

We can refer
Expand Down
35 changes: 35 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,14 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down Expand Up @@ -523,6 +531,14 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>

Expand Down Expand Up @@ -656,6 +672,10 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</exclusion>
</exclusions>
</dependency>

Expand Down Expand Up @@ -2293,6 +2313,12 @@
<module>deploy/kubernetes</module>
</modules>
</profile>
<profile>
<id>hadoop-dependencies-provided</id>
<properties>
<hadoop.scope>provided</hadoop.scope>
</properties>
</profile>
<profile>
<id>hadoop-dependencies-included</id>
<properties>
Expand Down Expand Up @@ -2329,6 +2355,15 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>netty-4.1.68.Final</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<properties>
<netty.version>4.1.68.Final</netty.version>
</properties>
</profile>

<profile>
<id>apache-release</id>
Expand Down
Loading