-
Notifications
You must be signed in to change notification settings - Fork 5
/
Dockerfile
126 lines (100 loc) · 5.66 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# VERSION 1.0 (apache-docker)
# AUTHOR: Abhishek Sharma<[email protected]>
# DESCRIPTION: docker apache airflow container
FROM python:3.6
MAINTAINER Abhishek Sharma <[email protected]>
ARG PYTHON_DEPS="boto3 "
ARG AIRFLOW_VERSION
ARG AIRFLOW_PATCH_VERSION
ARG AIRFLOW_DEPS="all,password"
ARG BUILD_DEPS="freetds-dev libkrb5-dev libsasl2-dev libssl-dev libffi-dev libpq-dev git"
ARG OTHER_DEPS="sshpass openssh-server openssh-client less gcc make wget vim curl rsync netcat logrotate"
ARG APT_DEPS="$BUILD_DEPS $OTHER_DEPS libsasl2-dev freetds-bin build-essential default-libmysqlclient-dev apt-utils locales "
ENV AIRFLOW_HOME /usr/local/airflow
ENV AIRFLOW_GPL_UNIDECODE yes
#install dependencies packages.
RUN set -x \
&& apt-get update \
&& if [ -n "${APT_DEPS}" ]; then apt-get install -y $APT_DEPS; fi
#Install Redis
RUN apt-get update && apt policy redis-server && apt-get install -y redis-server
#Install java for java based application.
RUN apt-get update && apt policy openjdk-8-jdk && apt-get install -y openjdk-8-jdk
#for older versions[1.8.1, 1.8.2] of airflow, pip downgrading is required.
RUN if [ ${AIRFLOW_VERSION} \< "1.8.3" ]; then pip install pip==9.0; \
else python -m pip install --upgrade pip setuptools wheel; fi
#Install python dependencies.
RUN if [ -n "${PYTHON_DEPS}" ]; then pip install --no-cache-dir ${PYTHON_DEPS}; fi
#Install Airflow all packages
RUN pip install apache-airflow[$AIRFLOW_DEPS]==$AIRFLOW_VERSION && apt-get clean
#Install GCloud[GCP] packages.
RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz
RUN mkdir -p /usr/local/gcloud
RUN tar -C /usr/local/gcloud -xvf /tmp/google-cloud-sdk.tar.gz
RUN /usr/local/gcloud/google-cloud-sdk/install.sh
RUN pip install --upgrade google-api-python-client && pip install google-cloud-storage
#Install aws cli.
RUN pip3 install awscli --upgrade --user
#Adding 'airflow' as group & user.
RUN groupadd -g 5555 airflow \
&& useradd -ms /bin/bash -d ${AIRFLOW_HOME} -u 5555 -g 5555 -p airflow airflow \
&& echo "airflow:airflow" | chpasswd && adduser airflow sudo
#Creating folder.
RUN mkdir /code-artifacts /data /home/airflow ${AIRFLOW_HOME}/startup_log ${AIRFLOW_HOME}/.ssh ${AIRFLOW_HOME}/.aws ${AIRFLOW_HOME}/.gcp ${AIRFLOW_HOME}/dags ${AIRFLOW_HOME}/logs ${AIRFLOW_HOME}/plugins \
&& mkdir -p /user/airflow \
&& chown -R airflow:airflow ${AIRFLOW_HOME}/* /data /code-artifacts /user/airflow /home/airflow
ADD airflow-config/airflow-${AIRFLOW_VERSION}.cfg ${AIRFLOW_HOME}/airflow.cfg
ADD config/user_add.py ${AIRFLOW_HOME}/user_add.py
ADD config/rbac_user_add.py ${AIRFLOW_HOME}/rbac_user_add.py
ADD config/execute_continous_scheduler.sh ${AIRFLOW_HOME}/execute_continous_scheduler.sh
# airflow patch files present in airflowPatch folder
RUN mkdir /tmp/airflow_patch
ADD airflowPatch1.8/* /tmp/airflow_patch/airflowPatch1.8/
ADD airflowPatch1.9/* /tmp/airflow_patch/airflowPatch1.9/
ADD airflowPatch1.10/* /tmp/airflow_patch/airflowPatch1.10/
RUN if [ ! -z "$AIRFLOW_PATCH_VERSION" ] ; then \
cp /tmp/airflow_patch/airflowPatch${AIRFLOW_PATCH_VERSION}/models.py /usr/local/lib/python3.6/site-packages/airflow/models.py; \
cp /tmp/airflow_patch/airflowPatch${AIRFLOW_PATCH_VERSION}/views.py /usr/local/lib/python3.6/site-packages/airflow/www/views.py; \
cp /tmp/airflow_patch/airflowPatch${AIRFLOW_PATCH_VERSION}/password_auth.py /usr/local/lib/python3.6/site-packages/airflow/contrib/auth/backends/password_auth.py; \
cp /tmp/airflow_patch/airflowPatch${AIRFLOW_PATCH_VERSION}/e3a246e0dc1_current_schema.py /usr/local/lib/python3.6/site-packages/airflow/migrations/versions/e3a246e0dc1_current_schema.py; \
chown root:staff /usr/local/lib/python3.6/site-packages/airflow/models.py; \
chown root:staff /usr/local/lib/python3.6/site-packages/airflow/www/views.py; \
chown root:staff /usr/local/lib/python3.6/site-packages/airflow/contrib/auth/backends/password_auth.py; \
chown root:staff /usr/local/lib/python3.6/site-packages/airflow/migrations/versions/e3a246e0dc1_current_schema.py; \
fi
RUN rm -rf /tmp/airflow_patch
#Adding S3 logger.
ADD airflowExtraFeatures/s3_logger.py /usr/local/lib/python3.6/site-packages/airflow/config_templates/s3_logger.py
RUN chown root:staff /usr/local/lib/python3.6/site-packages/airflow/config_templates/s3_logger.py
RUN chown -R airflow:airflow ${AIRFLOW_HOME}/*
VOLUME /usr/hdp
VOLUME /code-artifacts
VOLUME ${AIRFLOW_HOME}/.gcp
VOLUME ${AIRFLOW_HOME}/.aws
VOLUME ${AIRFLOW_HOME}/dags
VOLUME ${AIRFLOW_HOME}/logs
ENV PATH=$PATH::/usr/local/gcloud/google-cloud-sdk/bin/
ENV PYTHONPATH=${PYTHONPATH}:/usr/local/lib/python3.6/
#for airflow processes log rotation.
ADD logrotate/airflow /etc/logrotate.d/airflow
ADD logrotate/run_log_rotate.sh ${AIRFLOW_HOME}/run_log_rotate.sh
#setting up logrotation cron.
RUN echo "30 * * * * /bin/sh ${AIRFLOW_HOME}/run_log_rotate.sh >> ${AIRFLOW_HOME}/logRotate_logs.txt" >> ${AIRFLOW_HOME}/airflow_cron
RUN crontab ${AIRFLOW_HOME}/airflow_cron
RUN rm ${AIRFLOW_HOME}/airflow_cron
#ENV HDP_VERSION=2.6.1.0-129
#export HDP_VERSION=${HDP_VERSION}
#export HADOOP_CONF_DIR=/etc/hadoop/${HDP_VERSION}/0
#export SPARK_CONF_DIR=/etc/spark/${HDP_VERSION}/0
#export HIVE_CONF_DIR=/etc/hive/${HDP_VERSION}/0
#export TEZ_CONF_DIR=/etc/tez/${HDP_VERSION}/0
EXPOSE 5555 8793 2222 6379
COPY script/docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
RUN ln -s /usr/local/bin/docker-entrypoint.sh /entrypoint.sh # backwards compat
WORKDIR ${AIRFLOW_HOME}
USER airflow
#setting default mode of docker-airflow as 'standalone'. Will be helpful when from Kitematic.
ENV MODE standalone
HEALTHCHECK CMD ["curl", "-f", "http://localhost:2222/health"]
ENTRYPOINT ["docker-entrypoint.sh"]