|
| 1 | +ARG TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 2 | +FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 3 | + |
| 4 | +ARG CUDA_MAJOR_MINOR_VERSION='' |
| 5 | +ARG ENV_IN_FILENAME |
| 6 | +ARG ARG_BASED_ENV_IN_FILENAME |
| 7 | + |
| 8 | +ARG AMZN_BASE="/opt/amazon/sagemaker" |
| 9 | +ARG DB_ROOT_DIR="/opt/db" |
| 10 | +ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging" |
| 11 | + |
| 12 | +ARG NB_USER="sagemaker-user" |
| 13 | +ARG NB_UID=1000 |
| 14 | +ARG NB_GID=100 |
| 15 | + |
| 16 | +# https://www.openssl.org/source/ |
| 17 | +ARG FIPS_VALIDATED_SSL=3.0.8 |
| 18 | + |
| 19 | +ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/" |
| 20 | +ENV STUDIO_LOGGING_DIR="/var/log/studio/" |
| 21 | +ENV EDITOR="nano" |
| 22 | + |
| 23 | +USER root |
| 24 | +RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \ |
| 25 | + groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \ |
| 26 | + # Update the expected value of MAMBA_USER for the |
| 27 | + # _entrypoint.sh consistency check. |
| 28 | + echo "${NB_USER}" > "/etc/arg_mamba_user" && \ |
| 29 | + : |
| 30 | +ENV MAMBA_USER=$NB_USER |
| 31 | +ENV USER=$NB_USER |
| 32 | + |
| 33 | +RUN apt-get update && apt-get upgrade -y && \ |
| 34 | + apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano cron less mandoc && \ |
| 35 | + # We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly. |
| 36 | + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \ |
| 37 | + chmod g+w /etc/passwd && \ |
| 38 | + echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ |
| 39 | + touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \ |
| 40 | + # Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will |
| 41 | + # not be able to run any `apt-get install` commands and that would hamper customizability of the images. |
| 42 | + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ |
| 43 | + unzip awscliv2.zip && \ |
| 44 | + sudo ./aws/install && \ |
| 45 | + rm -rf aws awscliv2.zip && \ |
| 46 | + : |
| 47 | +RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile |
| 48 | + |
| 49 | +# CodeEditor - create server, user data dirs |
| 50 | +RUN mkdir -p /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data \ |
| 51 | + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data |
| 52 | + |
| 53 | +# create dir to store user data files |
| 54 | +RUN mkdir -p /opt/amazon/sagemaker/user-data \ |
| 55 | + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/user-data |
| 56 | + |
| 57 | + |
| 58 | +# Merge in OS directory tree contents. |
| 59 | +RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR} |
| 60 | +COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/ |
| 61 | +RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \ |
| 62 | + rm -rf ${DIRECTORY_TREE_STAGE_DIR} |
| 63 | + |
| 64 | +# CodeEditor - download the extensions |
| 65 | +RUN mkdir -p /etc/code-editor/extensions && \ |
| 66 | + while IFS= read -r url || [ -n "$url" ]; do \ |
| 67 | + echo "Downloading extension from ${url}..." && \ |
| 68 | + wget --no-check-certificate -P /etc/code-editor/extensions "${url}"; \ |
| 69 | + done < /etc/code-editor/extensions.txt |
| 70 | + |
| 71 | +USER $MAMBA_USER |
| 72 | +COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/ |
| 73 | + |
| 74 | +# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things |
| 75 | +# will happen. |
| 76 | +RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \ |
| 77 | + then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \ |
| 78 | + else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \ |
| 79 | + fi |
| 80 | + |
| 81 | +ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION |
| 82 | +RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ |
| 83 | + micromamba clean --all --yes --force-pkgs-dirs && \ |
| 84 | + rm -rf /tmp/*.in |
| 85 | + |
| 86 | + |
| 87 | +ARG MAMBA_DOCKERFILE_ACTIVATE=1 |
| 88 | +RUN sudo ln -s $(which python3) /usr/bin/python |
| 89 | + |
| 90 | +# Update npm version |
| 91 | +RUN npm i -g npm |
| 92 | + |
| 93 | +# Configure CodeEditor - Install extensions and set preferences |
| 94 | +RUN \ |
| 95 | + extensionloc=/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions && mkdir -p "${extensionloc}" \ |
| 96 | + # Loop through all vsix files in /etc/code-editor/extensions and install them |
| 97 | + && for ext in /etc/code-editor/extensions/*.vsix; do \ |
| 98 | + echo "Installing extension ${ext}..."; \ |
| 99 | + sagemaker-code-editor --install-extension "${ext}" --extensions-dir "${extensionloc}" --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data; \ |
| 100 | + done \ |
| 101 | + # Copy the settings |
| 102 | + && cp /etc/code-editor/code_editor_machine_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json |
| 103 | + |
| 104 | +# Install glue kernels, and move to shared directory |
| 105 | +# Also patching base kernel so Studio background code doesn't start session silently |
| 106 | +RUN install-glue-kernels && \ |
| 107 | + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ |
| 108 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ |
| 109 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ |
| 110 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ |
| 111 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ |
| 112 | + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ |
| 113 | + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" |
| 114 | + |
| 115 | + |
| 116 | +# Patch glue kernels to use kernel wrapper |
| 117 | +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json |
| 118 | +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json |
| 119 | + |
| 120 | +# Configure RTC - disable jupyter_collaboration by default |
| 121 | +RUN jupyter labextension disable @jupyter/collaboration-extension |
| 122 | + |
| 123 | +USER root |
| 124 | +RUN HOME_DIR="/home/${NB_USER}/licenses" \ |
| 125 | + && mkdir -p ${HOME_DIR} \ |
| 126 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 127 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 128 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 129 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 130 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 131 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \ |
| 132 | + && rm -rf ${HOME_DIR}/oss_compliance* |
| 133 | + |
| 134 | +# Create logging directories for supervisor |
| 135 | +RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \ |
| 136 | + chmod a+rw $SAGEMAKER_LOGGING_DIR && \ |
| 137 | + mkdir -p ${STUDIO_LOGGING_DIR} && \ |
| 138 | + chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR} |
| 139 | + |
| 140 | +# Clean up CodeEditor artifacts |
| 141 | +RUN rm -rf /etc/code-editor |
| 142 | + |
| 143 | +# Create supervisord runtime directory |
| 144 | +RUN mkdir -p /var/run/supervisord && \ |
| 145 | + chmod a+rw /var/run/supervisord |
| 146 | + |
| 147 | +# Create root directory for DB |
| 148 | +# Create logging directories for supervisor |
| 149 | +RUN mkdir -p $DB_ROOT_DIR && \ |
| 150 | + chmod a+rw $DB_ROOT_DIR |
| 151 | + |
| 152 | +USER $MAMBA_USER |
| 153 | +ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH" |
| 154 | +WORKDIR "/home/${NB_USER}" |
| 155 | + |
| 156 | +# Install FIPS Provider for OpenSSL, on top of existing OpenSSL installation |
| 157 | +# v3.0.8 is latest FIPS validated provider, so this is the one we install |
| 158 | +# But we need to run tests against the installed version. |
| 159 | +# see https://github.com/openssl/openssl/blob/master/README-FIPS.md https://www.openssl.org/source/ |
| 160 | +RUN INSTALLED_SSL=$(micromamba list | grep openssl | tr -s ' ' | cut -d ' ' -f 3 | head -n 1) && \ |
| 161 | + # download source code for installed, and FIPS validated openssl versions |
| 162 | + curl -L https://www.openssl.org/source/openssl-$FIPS_VALIDATED_SSL.tar.gz > openssl-$FIPS_VALIDATED_SSL.tar.gz && \ |
| 163 | + curl -L https://www.openssl.org/source/openssl-$INSTALLED_SSL.tar.gz > openssl-$INSTALLED_SSL.tar.gz && \ |
| 164 | + tar -xf openssl-$FIPS_VALIDATED_SSL.tar.gz && tar -xf openssl-$INSTALLED_SSL.tar.gz && cd openssl-$FIPS_VALIDATED_SSL && \ |
| 165 | + # Configure both versions to enable FIPS and build |
| 166 | + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ |
| 167 | + cd ../openssl-$INSTALLED_SSL && \ |
| 168 | + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ |
| 169 | + # Copy validated provider to installed version for testing |
| 170 | + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fips.so providers/. && \ |
| 171 | + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fipsmodule.cnf providers/. && \ |
| 172 | + make tests && cd ../openssl-$FIPS_VALIDATED_SSL && \ |
| 173 | + # After tests pass, install FIPS provider and remove source code |
| 174 | + make install_fips && cd .. && rm -rf ./openssl-* |
| 175 | +# Create new config file with fips-enabled. Then user can override OPENSSL_CONF to enable FIPS |
| 176 | +# e.g. export OPENSSL_CONF=/opt/conda/ssl/openssl-fips.cnf |
| 177 | +RUN cp /opt/conda/ssl/openssl.cnf /opt/conda/ssl/openssl-fips.cnf && \ |
| 178 | + sed -i "s:# .include fipsmodule.cnf:.include /opt/conda/ssl/fipsmodule.cnf:" /opt/conda/ssl/openssl-fips.cnf && \ |
| 179 | + sed -i 's:# fips = fips_sect:fips = fips_sect:' /opt/conda/ssl/openssl-fips.cnf |
| 180 | +ENV OPENSSL_MODULES=/opt/conda/lib64/ossl-modules/ |
| 181 | + |
| 182 | +# Install Kerberos. |
| 183 | +# Make sure no dependency is added/updated |
| 184 | +RUN pip install "krb5>=0.5.1,<0.6" && \ |
| 185 | + pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] ' |
| 186 | + |
| 187 | +# https://stackoverflow.com/questions/122327 |
| 188 | +RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \ |
| 189 | + # Remove SparkRKernel as it's not supported \ |
| 190 | + jupyter-kernelspec remove -f -y sparkrkernel && \ |
| 191 | + # Patch Sparkmagic lib to support Custom Certificates \ |
| 192 | + # https://github.com/jupyter-incubator/sparkmagic/pull/435/files \ |
| 193 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \ |
| 194 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \ |
| 195 | + sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 196 | + sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 197 | + sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json |
| 198 | + |
| 199 | +ENV SHELL=/bin/bash |
| 200 | + |
0 commit comments