diff --git a/.build/.versions.yml b/.build/.versions.yml new file mode 100644 index 0000000..b7282a0 --- /dev/null +++ b/.build/.versions.yml @@ -0,0 +1,47 @@ +################ Version Compatibility Matrix ###################### +##### SECTION USED TO GET COMPATIBLE VERSIONS ###################### +##### DONT PUT python_version IN AN ARRAY - JAVA COMPATIBILITY ##### +####### ENRICH WITH NEWER VERSIONS ################################# +compatibility-matrix: + # Maximum python version supported by spark-3.2.x: 3.9 + # Java support: 8/11 + - python_version: 3.9 + spark_version: [3.2.1, 3.2.2, 3.2.3, 3.2.4] + java_version: [11] + scala_version: [2.12, 2.13] + hadoop_version: 3.2 + spark_download_url: https://archive.apache.org/dist/spark/ + # Maximum python version supported by spark-3.3.x: 3.10 + # Java support: 8/11/17 + - python_version: '3.10' + spark_version: [3.3.1, 3.3.2, 3.3.3, 3.3.4] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + spark_download_url: https://archive.apache.org/dist/spark/ + # Maximum python version supported by spark-3.4.x: 3.11 + # Java support: 8/11/17 + - python_version: 3.11 + spark_version: [3.4.1, 3.4.2] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + spark_download_url: https://archive.apache.org/dist/spark/ + # https://spark.apache.org/releases/spark-release-3-5-0.html + # Minimum supported java version: 17/21 + - python_version: 3.11 + spark_version: [3.5.0] + java_version: [17] + scala_version: [2.12, 2.13] + hadoop_version: 3 + spark_download_url: https://archive.apache.org/dist/spark/ +### Override the matrix above by providing the versions to build +### 1- The build-matrix is empty: build with all possible combintations +### 2- Override specific versions: build with all possible combinations which are compatible with that specific versions +### 3- The versions not present on compatibility-matrix are ignored +build-matrix: + python_version: ['3.9', '3.10', '3.11'] + spark_version: [3.2.4, 3.3.2, 3.3.4, 3.4.2, 3.5.0] + java_version: [11, 17] + scala_version: [2.12] + diff --git a/.github/actions/install-patchs-and-extension/action.yml b/.github/actions/install-patchs-and-extension/action.yml new file mode 100644 index 0000000..3ceed4f --- /dev/null +++ b/.github/actions/install-patchs-and-extension/action.yml @@ -0,0 +1,20 @@ +name: Set up dev env requirements +description: Set up dev env requirements for build and tests + +runs: + using: composite + steps: + - name: Setup dev env requirement 📦 + run: | + cp -f requirements-dev.txt ../ + cat ../python/requirements-extended.txt >> ../requirements-dev.txt + working-directory: ./docker-stacks + shell: bash + + - name: Copy patchs 📦 + run: | + cp -fr ../python/okdp/patch/* ./ + cp -fr ../python/okdp ./ + working-directory: ./docker-stacks + shell: bash + diff --git a/.github/workflows/auto-rerun.yml b/.github/workflows/auto-rerun.yml new file mode 100644 index 0000000..6ecc3b4 --- /dev/null +++ b/.github/workflows/auto-rerun.yml @@ -0,0 +1,27 @@ +# Credit for the following workraound: https://github.com/orgs/community/discussions/67654#discussioncomment-8038649 +name: Automatic partital rerun on workflow failure +on: + workflow_dispatch: + inputs: + run_id: + required: true + workflow_name: + required: true + workflow_sha: + required: true +jobs: + rerun: + runs-on: ubuntu-latest + permissions: + contents: write + actions: write + packages: write + steps: + - name: Re-run workflow ${{ inputs.workflow_name }}-${{ inputs.run_id }} + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh run watch ${{ inputs.run_id }} > /dev/null 2>&1 + gh run rerun ${{ inputs.run_id }} --failed diff --git a/.github/workflows/build-test-base.yml b/.github/workflows/build-test-base.yml new file mode 100644 index 0000000..71c8023 --- /dev/null +++ b/.github/workflows/build-test-base.yml @@ -0,0 +1,82 @@ +name: Build, test, and push jupyter base images + +on: + workflow_call: + inputs: + python_version: + description: Python version + required: true + type: string + python_dev_tag: + description: Tag to use for latest base images (foundation, minimal, etc) + required: true + type: string + registry: + description: The list of tags space separated values + required: true + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + secrets: + registry_username: + required: true + registry_token: + required: true + +defaults: + run: + working-directory: ./docker-stacks + +jobs: + + docker-stacks-foundation: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: "" + image: docker-stacks-foundation:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + build-args: + PYTHON_VERSION=${{ inputs.python_version }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + + base-notebook: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: docker-stacks-foundation:${{ inputs.python_dev_tag }} + image: base-notebook:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + needs: [docker-stacks-foundation] + + minimal-notebook: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: base-notebook:${{ inputs.python_dev_tag }} + image: minimal-notebook:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + needs: [base-notebook] + + scipy-notebook: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: minimal-notebook:${{ inputs.python_dev_tag }} + image: scipy-notebook:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + needs: [minimal-notebook] + diff --git a/.github/workflows/build-test-datascience.yml b/.github/workflows/build-test-datascience.yml new file mode 100644 index 0000000..0bffa9b --- /dev/null +++ b/.github/workflows/build-test-datascience.yml @@ -0,0 +1,88 @@ +name: Build, test, and push jupyter datascience images + +on: + workflow_call: + inputs: + python_dev_tag: + description: Tag to use for latest base images (foundation, minimal, etc) + required: true + type: string + registry: + description: The list of tags space separated values + required: true + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + secrets: + registry_username: + required: true + registry_token: + required: true + +defaults: + run: + working-directory: ./docker-stacks + +jobs: + r: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: minimal-notebook:${{ inputs.python_dev_tag }} + image: r-notebook:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ${{ inputs.runs-on }} + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + + datascience: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: scipy-notebook:${{ inputs.python_dev_tag }} + image: datascience-notebook:${{ inputs.python_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ${{ inputs.runs-on }} + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + + # julia: + # uses: ./.github/workflows/docker-build-test-push-latest.yml + # with: + # parent-image: minimal-notebook:${{ inputs.python_dev_tag }} + # image: julia-notebook + # registry: ${{ inputs.registry }} + # runs-on: ${{ inputs.runs-on }} + # secrets: + # registry_username: ${{ secrets.registry_username }} + # registry_token: ${{ secrets.registry_token }} + + # tensorflow: + # uses: ./.github/workflows/docker-build-test-push-latest.yml + # with: + # parent-image: scipy-notebook:${{ inputs.python_dev_tag }} + # image: tensorflow-notebook + # registry: ${{ inputs.registry }} + # runs-on: ${{ inputs.runs-on }} + # secrets: + # registry_username: ${{ secrets.registry_username }} + # registry_token: ${{ secrets.registry_token }} + + # pytorch: + # uses: ./.github/workflows/docker-build-test-push-latest.yml + # with: + # parent-image: scipy-notebook:${{ inputs.python_dev_tag }} + # image: pytorch-notebook + # registry: ${{ inputs.registry }} + # runs-on: ${{ inputs.runs-on }} + # secrets: + # registry_username: ${{ secrets.registry_username }} + # registry_token: ${{ secrets.registry_token }} + + + + + + diff --git a/.github/workflows/build-test-spark.yml b/.github/workflows/build-test-spark.yml new file mode 100644 index 0000000..255e5d1 --- /dev/null +++ b/.github/workflows/build-test-spark.yml @@ -0,0 +1,85 @@ +name: Build, test, and push jupyter Spark images + +on: + workflow_call: + inputs: + python_version: + description: Python version + required: true + type: string + spark_download_url: + description: Spark dist download url + required: true + type: string + spark_version: + description: Spark version + required: true + type: string + java_version: + description: Java version + required: true + type: string + scala_version: + description: Scala version + required: true + type: string + hadoop_version: + description: Hadoop version + required: true + type: string + python_dev_tag: + description: Tag to use for latest base images (foundation, minimal, etc) + required: true + type: string + spark_dev_tag: + description: Tag to use for latest pyspark images (pyspark, all-spark, etc) + required: true + type: string + registry: + description: The list of tags space separated values + required: true + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + secrets: + registry_username: + required: true + registry_token: + required: true + +defaults: + run: + working-directory: ./docker-stacks + +jobs: + pyspark: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: scipy-notebook:${{ inputs.python_dev_tag }} + image: pyspark-notebook:${{ inputs.spark_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ${{ inputs.runs-on }} + build-args: + spark_download_url=${{ inputs.spark_download_url }} + spark_version=${{ inputs.spark_version }} + openjdk_version=${{ inputs.java_version }} + scala_version=${{ inputs.scala_version }} + hadoop_version=${{ inputs.hadoop_version }} + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + + all-spark: + uses: ./.github/workflows/docker-build-test-push-latest.yml + with: + parent-image: pyspark-notebook:${{ inputs.spark_dev_tag }} + image: all-spark-notebook:${{ inputs.spark_dev_tag }} + registry: ${{ inputs.registry }} + runs-on: ${{ inputs.runs-on }} + secrets: + registry_username: ${{ secrets.registry_username }} + registry_token: ${{ secrets.registry_token }} + needs: [pyspark] + diff --git a/.github/workflows/docker-build-test-push-latest.yml b/.github/workflows/docker-build-test-push-latest.yml new file mode 100644 index 0000000..7b58885 --- /dev/null +++ b/.github/workflows/docker-build-test-push-latest.yml @@ -0,0 +1,114 @@ +name: Build, test and tag the image as --latest; then push to the container registry + +on: + workflow_call: + inputs: + parent-image: + description: Parent image name + required: true + type: string + image: + description: Image name + required: true + type: string + registry: + description: The list of tags space separated values + required: true + type: string + build-args: + description: Build args comma separated list, ex. PYTHON_VERSION=3.11, ... + required: false + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + secrets: + registry_username: + required: true + registry_token: + required: true + +defaults: + run: + working-directory: ./docker-stacks +jobs: + build-test-upload: + name: ${{ inputs.image }} + runs-on: ${{ inputs.runs-on }} + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Setup dev env patchs 📦 + uses: ./.github/actions/install-patchs-and-extension + + - name: Create dev environment 📦 + uses: ./docker-stacks/.github/actions/create-dev-env + + - name: Expose git commit sha as env variable + uses: rlespinasse/git-commit-data-action@v1.5.0 + + - name: Login to Registry 🔐 + uses: docker/login-action@v3 + with: + registry: ${{ inputs.registry }} + username: ${{ secrets.registry_username }} + password: ${{ secrets.registry_token }} + + - name: Pull parent image 📥 + if: inputs.parent-image != '' + run: docker pull ${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.parent-image }} + shell: bash + + - name: Prepare image build (build args) 📦 + run: | + for build_arg in ${{ inputs.build-args }} + do + BUILD_ARGS+="--build-arg $build_arg " + done + if [[ "${{ inputs.parent-image }}" ]] + then + BUILD_ARGS+="--build-arg BASE_CONTAINER=${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.parent-image }} " + fi + echo "BUILD_ARGS=$BUILD_ARGS" >> $GITHUB_ENV + # The short image name is necessary to run the tests (not pushed, local to jobs only) + echo "SHORT_IMAGE_NAME=${{ inputs.image }}" | awk -F: '{print $1}' >> $GITHUB_ENV + shell: bash + + - name: Patch PySpark Dockerfile to be compatible with java +11 📦 + if: contains(inputs.image, 'pyspark-notebook:') && contains(inputs.build-args, 'spark_version=3.2.') && ! contains(inputs.build-args, 'openjdk_version=8') + run: | + cat ../images/patch/pyspark-notebook/Dockerfile.spark3.2.x >> images/$SHORT_IMAGE_NAME/Dockerfile + shell: bash + + - name: Build image 🛠 + run: | + #docker buildx build --platform=linux/amd64 --rm --force-rm --tag ... + docker build --rm --force-rm --tag ${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.image }} \ + --tag ${{ inputs.registry }}/${{ github.repository_owner }}/$SHORT_IMAGE_NAME:latest images/$SHORT_IMAGE_NAME/ \ + --build-arg REGISTRY=${{ inputs.registry }} \ + --build-arg OWNER=${{ github.repository_owner }} $BUILD_ARGS \ + --label "org.opencontainers.image.source=https://github.com/${{ github.repository }}" \ + --label "org.opencontainers.image.description=$SHORT_IMAGE_NAME" + env: + DOCKER_BUILDKIT: 1 + # Full logs for CI build + BUILDKIT_PROGRESS: plain + shell: bash + + # Run docker-stacks tests (docker-stacks/tests) + - name: Run tests ✅ + # Skip tests when running with ACT + if: env.ACT_SKIP_TESTS == '' + run: | + python3 -m tests.run_tests --short-image-name $SHORT_IMAGE_NAME --registry ${{ inputs.registry }} --owner ${{ github.repository_owner }} + shell: bash + + - name: Push latest tag image to registry 📤 + run: docker push ${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.image }} + shell: bash + + + \ No newline at end of file diff --git a/.github/workflows/docker-tag-push.yml b/.github/workflows/docker-tag-push.yml new file mode 100644 index 0000000..50cf897 --- /dev/null +++ b/.github/workflows/docker-tag-push.yml @@ -0,0 +1,65 @@ +name: Pull the image with it's latest tag, apply new tags and push back to container registry + +on: + workflow_call: + inputs: + image: + description: Image name + required: true + type: string + registry: + description: The list of tags space separated values + required: true + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + secrets: + registry_username: + required: true + registry_token: + required: true + +defaults: + run: + working-directory: ./docker-stacks + +jobs: + tag-push: + runs-on: ${{ inputs.runs-on }} + name: ${{ inputs.image }} + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Setup dev env patchs 📦 + uses: ./.github/actions/install-patchs-and-extension + + - name: Create dev environment 📦 + uses: ./docker-stacks/.github/actions/create-dev-env + + - name: Login to Registry 🔐 + uses: docker/login-action@v3 + with: + registry: ${{ inputs.registry }} + username: ${{ secrets.registry_username }} + password: ${{ secrets.registry_token }} + + - name: Pull latest tag image 📥 + run: docker pull ${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.image }} + shell: bash + + - name: Apply tags to the loaded image 🏷 + run: python3 -m okdp.extension.tagging.apply_tags --short-image-name ${{ inputs.image }} --registry ${{ inputs.registry }} --owner ${{ github.repository_owner }} + + - name: Prepare image push 📦 + run: | + # The short image name (without tag) is necessary to push to the registry + echo "SHORT_IMAGE_NAME=${{ inputs.image }}" | awk -F: '{print $1}' >> $GITHUB_ENV + shell: bash + + - name: Push Images to Registry 📤 + run: docker push --all-tags ${{ inputs.registry }}/${{ github.repository_owner }}/$SHORT_IMAGE_NAME + shell: bash diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..8b3eb04 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,234 @@ +name: Build, test, and push jupyter images + +on: + pull_request: + paths: + - ".github/workflows/main.yml" + - ".github/workflows/build-base.yml" + - ".github/workflows/build-datascience.yml" + - ".github/workflows/build-spark.yml" + - ".github/workflows/docker-build-test-push-latest.yml" + - ".github/workflows/docker-tag-push.yml" + - ".github/actions/generate-build-matrix/action.yml" + - ".github/actions/install-patchs-and-extension/action.yml" + + - ".build/.versions.yml" + + - "python/okdp/**" + - "docker-stacks/images/**" + - "docker-stacks/tests/**" + - "docker-stacks/tagging/**" + - "images/**" + + - "!python/okdp/patch/README.md" + - "!images/README.md" + + push: + branches: + - main + paths: + - ".github/workflows/main.yml" + - ".github/workflows/build-base.yml" + - ".github/workflows/build-datascience.yml" + - ".github/workflows/build-spark.yml" + - ".github/workflows/docker-build-test-push-latest.yml" + - ".github/workflows/docker-tag-push.yml" + - ".github/actions/generate-build-matrix/action.yml" + - ".github/actions/install-patchs-and-extension/action.yml" + + - ".build/.versions.yml" + + - "python/okdp/**" + - "docker-stacks/images/**" + - "docker-stacks/tests/**" + - "docker-stacks/tagging/**" + - "images/**" + + - "!python/okdp/patch/README.md" + - "!images/README.md" + + workflow_dispatch: + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + working-directory: ./docker-stacks + +jobs: + run-unit-tests: + uses: ./.github/workflows/unit-tests.yml + with: + runs-on: ubuntu-latest + + build-version-compatibility-matrix: + runs-on: ubuntu-latest + outputs: + spark: ${{ steps.set-matrix.outputs.spark }} + python: ${{ steps.set-matrix.outputs.python }} + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Setup dev env patchs 📦 + uses: ./.github/actions/install-patchs-and-extension + + - name: Create dev environment 📦 + uses: ./docker-stacks/.github/actions/create-dev-env + + - name: Get current branch 📦 + id: branch + uses: tj-actions/branch-names@v8 + + - name: Build version compatibility matrix 🛠 + id: set-matrix + run: | + python3 -m okdp.extension.matrix.version_compatibility_matrix \ + --versions-matrix-path ../.build/.versions.yml \ + --git-branch ${{ steps.branch.outputs.current_branch || steps.branch.outputs.tag}} >> $GITHUB_OUTPUT + cat $GITHUB_OUTPUT + shell: bash + needs: [run-unit-tests] + + build-base: + name: build-test-base (python-${{ matrix.python.python_version }}) + strategy: + # 3 Jobs in //, the base jobs run in sequential + max-parallel: 3 + matrix: + python: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.python) }} + uses: ./.github/workflows/build-test-base.yml + with: + python_version: ${{ matrix.python.python_version }} + python_dev_tag: ${{ matrix.python.python_dev_tag }} + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix] + + build-datascience: + name: build-test-datascience (python-${{ matrix.python.python_version }}) + strategy: + # 1 matrix call = +2 jobs in // (check the number here build-datascience.yml) + max-parallel: 1 + matrix: + python: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.python) }} + uses: ./.github/workflows/build-test-datascience.yml + with: + python_dev_tag: ${{ matrix.python.python_dev_tag }} + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix, build-base] + + build-spark: + name: build-test-spark (python-${{ matrix.spark.python_version }}) + strategy: + # 2 jobs in // + max-parallel: 2 + matrix: + spark: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.spark) }} + uses: ./.github/workflows/build-test-spark.yml + with: + spark_download_url: ${{ matrix.spark.spark_download_url }} + python_version: ${{ matrix.spark.python_version }} + spark_version: ${{ matrix.spark.spark_version }} + java_version: ${{ matrix.spark.java_version }} + scala_version: ${{ matrix.spark.scala_version }} + hadoop_version: ${{ matrix.spark.hadoop_version }} + python_dev_tag: ${{ matrix.spark.python_dev_tag }} + spark_dev_tag: ${{ matrix.spark.spark_dev_tag }} + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix, build-base] + + ### 3 push in // + push-base: + if: github.ref == 'refs/heads/main' + name: push-base (python-${{ matrix.python.python_version }}) + strategy: + max-parallel: 1 + matrix: + image: [ docker-stacks-foundation, base-notebook, minimal-notebook, scipy-notebook,] + python: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.python) }} + uses: ./.github/workflows/docker-tag-push.yml + with: + image: "${{ matrix.image }}:${{ matrix.python.python_dev_tag }}" + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix, build-datascience, build-spark,] + + push-datascience: + if: github.ref == 'refs/heads/main' + name: push-datascience (python-${{ matrix.python.python_version }}) + strategy: + max-parallel: 1 + matrix: + image: [r-notebook, datascience-notebook, ] + python: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.python) }} + uses: ./.github/workflows/docker-tag-push.yml + with: + image: "${{ matrix.image }}:${{ matrix.python.python_dev_tag }}" + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix, build-datascience, build-spark] + + push-spark: + if: github.ref == 'refs/heads/main' + strategy: + max-parallel: 1 + matrix: + image: [ pyspark-notebook, all-spark-notebook,] + spark: ${{ fromJson(needs.build-version-compatibility-matrix.outputs.spark) }} + uses: ./.github/workflows/docker-tag-push.yml + with: + image: "${{ matrix.image }}:${{ matrix.spark.spark_dev_tag }}" + registry: ${{ vars.REGISTRY || 'ghcr.io' }} + runs-on: ubuntu-latest + secrets: + registry_username: ${{ secrets.REGISTRY_USERNAME }} + registry_token: ${{ secrets.REGISTRY_ROBOT_TOKEN }} + needs: [build-version-compatibility-matrix, build-datascience, build-spark] + + # Credit for the following workraound: https://github.com/orgs/community/discussions/67654#discussioncomment-8038649 + partial-rerun-on-failure: + # Allow re-run the main branch only + if: failure() && github.ref == 'refs/heads/main' && fromJSON(github.run_attempt) < 3 + runs-on: ubuntu-latest + permissions: + contents: write + actions: write + packages: write + steps: + - name: "Re-run failed jobs" + env: + GH_REPO: ${{ github.repository }} + GH_TOKEN: ${{ github.token }} + GH_DEBUG: api + run: | + gh workflow run auto-rerun.yml \ + -F run_id=${{ github.run_id }} \ + -F workflow_name=${{ github.workflow }} \ + -F workflow_sha=${{ github.workflow_sha }} + + shell: bash + + needs: [push-base, push-datascience, push-spark] + \ No newline at end of file diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 0000000..8c07a70 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,28 @@ +on: + workflow_call: + inputs: + runs-on: + description: GitHub Actions Runner image + required: true + type: string + +jobs: + + unit-tests: + runs-on: ${{ inputs.runs-on }} + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Setup dev env patchs 📦 + uses: ./.github/actions/install-patchs-and-extension + + - name: Create dev environment 📦 + uses: ./docker-stacks/.github/actions/create-dev-env + + - name: Run unit tests + run: pytest python/tests -v --color=yes + shell: bash + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f799eee --- /dev/null +++ b/.gitignore @@ -0,0 +1,197 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +################################################################## +# The content above is copied from # +# https://github.com/github/gitignore/blob/main/Python.gitignore # +# Please, add the content only below these lines # +################################################################## + +### IntelliJ IDEA ### +.idea +*.iml + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + + +### Mac OS ### +.DS_Store + +### vscode ### +.vscode/ + diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4029e1c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.analysis.extraPaths": [ + "./python/okdp", + "./python/tests", + "./docker-stacks" + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 87c5ac3..8bcf1ed 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,131 @@ -# jupyterlab-docker -okdp jupyterlab docker images +# OKDP Jupyter Images + +[![Build, test, tag, and push jupyter images](https://github.com/OKDP/jupyterlab-docker/actions/workflows/main.yml/badge.svg)](https://github.com/OKDP/jupyterlab-docker/actions/workflows/main.yml) + +OKDP jupyter docker images based on [jupyter docker-stacks](https://github.com/jupyter/docker-stacks) source dockerfiles. It includes (read only copy) [jupyter docker-stacks](https://github.com/jupyter/docker-stacks) repository as a [git-subtree](https://www.atlassian.com/git/tutorials/git-subtree) sub project. + +The project leverages the features provided by [jupyter docker-stacks](https://github.com/jupyter/docker-stacks): +- Build from the original [source docker files](docker-stacks/images) +- Customize the images by using docker ```build-arg``` build arguments +- Run the original [tests](docker-stacks/tests) at every pipeline trigger + +The project provides an up to date jupyter lab images especially for pyspark. + +# Images build workflow +## Build/Test + +The [main](.github/workflows/main.yml) build pipeline contains 6 main reusable workflows: + +1. [build-test-base](.github/workflows/build-test-base.yml): docker-stacks-foundation, base-notebook, minimal-notebook, scipy-notebook +2. [build-test-datascience](.github/workflows/build-test-datascience.yml): r-notebook, julia-notebook, tensorflow-notebook, pytorch-notebook +3. [build-test-spark](.github/workflows/build-test-spark.yml): pyspark-notebook, all-spark-notebook +4. [tag-push](.github/workflows/docker-tag-push.yml): push the built images to the container registry (main branch only) +5. [auto-rerun](.github/workflows/auto-rerun.yml): partially re-run jobs in case of failures (github runner issues/main branch only) +6. [unit-tests](.github/workflows/unit-tests.yml): run the unit tests (okdp extension) at every pipeline trigger + +![build pipeline](doc/_images/build-pipeline.png) + +The build is based on the [version compatibility matrix](.build/.versions.yml). + +The [build-matrix](.build/.versions.yml#L42) section defines the components versions to build. It behaves like a filter of the parent [version compatibility matrix](.build/.versions.yml) to limit the versions combintations to build. The build process ensures only the compatible versions are built: + +For example, the following build-matrix: + +```yaml +build-matrix: + python_version: ['3.9', '3.10', '3.11'] + spark_version: [3.2.4, 3.3.4, 3.4.2, 3.5.0] + java_version: [11, 17] + scala_version: [2.12] +``` + +Will build the following versions combinations in regards to [compatibility-matrix](.build/.versions.yml#5) section: +- spark3.3.4-python3.10-java17-scala2.12 +- spark3.5.0-python3.11-java17-scala2.12 +- spark3.4.2-python3.11-java17-scala2.12 +- spark3.2.4-python3.9-java11-scala2.12 + +By default, if no filter is specified: + +```yaml +build-matrix: +``` + +All compatible versions combinations are built. + +Finally, all the images are tested against the original [tests](docker-stacks/tests) at every pipeline trigger + +## Push + +Development images with tags ```--latest``` suffix (ex.: spark3.2.4-python3.9-java11-scala2.12--latest) are produced at every pipeline run regardless of the git branch (main or not). + +The [official images](#tagging) are pushed to the [container registry](https://github.com/orgs/OKDP/packages) when: + +1. The workflow is triggered on the main branch only and +2. The [tests](#build/test) are completed successfully + +This prevents pull requests or developement branchs to push the official images before they are reviewed or tested. It also provides the flexibility to test against developement images ```--latest``` before they are officially pushed. + +## Tagging + +The project builds the images with a long format tags. Each tag combines multiple compatible versions combinations. + +There are multiple tags levels and the format to use is depending on your convenience in term of stability and reproducibility. + +Here are some examples: + +### scipy-notebook: +- python-3.11-2024-02-06 +- python-3.11.7-2024-02-06 +- python-3.11.7-hub-4.0.2-lab-4.1.0 +- python-3.11.7-hub-4.0.2-lab-4.1.0-2024-02-06 + +### datascience-notebook: +- python-3.9-2024-02-06 +- python-3.9.18-2024-02-06 +- python-3.9.18-hub-4.0.2-lab-4.1.0 +- python-3.9.18-hub-4.0.2-lab-4.1.0-2024-02-06 +- python-3.9.18-r-4.3.2-julia-1.10.0-2024-02-06 +- python-3.9.18-r-4.3.2-julia-1.10.0-hub-4.0.2-lab-4.1.0 +- python-3.9.18-r-4.3.2-julia-1.10.0-hub-4.0.2-lab-4.1.0-2024-02-06 + +### pyspark-notebook: +- spark-3.5.0-python-3.11-java-17-scala-2.12 +- spark-3.5.0-python-3.11-java-17-scala-2.12-2024-02-06 +- spark-3.5.0-python-3.11.7-java-17.0.9-scala-2.12.18-hub-4.0.2-lab-4.1.0 +- spark-3.5.0-python-3.11.7-java-17.0.9-scala-2.12.18-hub-4.0.2-lab-4.1.0-2024-02-06 +- spark-3.5.0-python-3.11.7-r-4.3.2-java-17.0.9-scala-2.12.18-hub-4.0.2-lab-4.1.0 +- spark-3.5.0-python-3.11.7-r-4.3.2-java-17.0.9-scala-2.12.18-hub-4.0.2-lab-4.1.0-2024-02-06 + +Please, check the [container registry](https://github.com/orgs/OKDP/packages) for more images and tags. + +# Build locally with Act + +[Act](https://github.com/nektos/act) can be used to build and test locally. + +Here is an example command: + +```shell +$ act --container-architecture linux/amd64 \ + -W .github/workflows/main.yml \ + --env ACT_SKIP_TESTS= \ + --var REGISTRY=ghcr.io \ + --secret REGISTRY_USERNAME= \ + --secret REGISTRY_ROBOT_TOKEN= + --rm +``` + +set the option ```--container-architecture linux/amd64``` if you are running locally with Apple's M1/M2 chips. + +For more information: + +```shell +$ act --help +``` + +# OKDP custom extensions + +1. [Tagging extension](python/okdp/extension/tagging) is based on the original [jupyter docker-stacks](docker-stacks/tagging) source files +2. [Patchs](python/okdp/patch/README.md) patchs the original [jupyter docker-stacks](docker-stacks/tests) in order to run the tests +3. [Version compatibility matrix](python/okdp/extension/matrix) to generate all the compatible versions combintations for pyspark +4. [Unit tests](python/tests) in order to test okdp extension at every pipeline run \ No newline at end of file diff --git a/doc/_images/build-pipeline.png b/doc/_images/build-pipeline.png new file mode 100644 index 0000000..466bde3 Binary files /dev/null and b/doc/_images/build-pipeline.png differ diff --git a/docker-stacks/.flake8 b/docker-stacks/.flake8 new file mode 100644 index 0000000..a609f12 --- /dev/null +++ b/docker-stacks/.flake8 @@ -0,0 +1,4 @@ +[flake8] +max-line-length = 88 +select = C, E, F, W, B, B950 +extend-ignore = E203, E501, W503 diff --git a/docker-stacks/.gitattributes b/docker-stacks/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/docker-stacks/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/docker-stacks/.github/ISSUE_TEMPLATE/bug_report.yml b/docker-stacks/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..064028d --- /dev/null +++ b/docker-stacks/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,129 @@ +name: Bug report +description: Create a report to help us improve +labels: ["type:Bug"] + +body: + - type: markdown + attributes: + value: | + Hi! Thanks for using the Jupyter Docker Stacks and taking some time to contribute to this project. + + We'd appreciate it if you could check out the [Troubleshooting common problems](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/troubleshooting.html) section in the documentation, + as well as [existing issues](https://github.com/jupyter/docker-stacks/issues?q=is%3Aissue) prior to submitting an issue to avoid duplication. + + Please answer the following sections to help us troubleshoot the problem. + + - type: dropdown + attributes: + label: What docker image(s) are you using? + description: Select as many images as applicable + multiple: true + options: + - all-spark-notebook + - base-notebook + - datascience-notebook + - docker-stacks-foundation + - julia-notebook + - minimal-notebook + - pyspark-notebook + - pytorch-notebook + - r-notebook + - scipy-notebook + - tensorflow-notebook + validations: + required: true + + - type: input + attributes: + label: Host OS system + placeholder: | + Example: + Ubuntu 22.04 + validations: + required: true + + - type: dropdown + attributes: + label: Host architecture + options: + - x86_64 + - aarch64 + validations: + required: true + + - type: textarea + attributes: + label: What Docker command are you running? + description: | + What complete docker command do you run to launch the container (omitting sensitive values)? + placeholder: | + Example: + `docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook` + validations: + required: true + + - type: textarea + attributes: + label: How to Reproduce the problem? + description: Please provide steps to reproduce this bug (once the container is running). + placeholder: | + Example: + + 1. Visit + + 2. Start an R notebook + + 3. ... + validations: + required: true + + - type: textarea + attributes: + label: Command output + render: bash session + description: | + Provide the output of the steps above, including the commands + themselves and Docker's output/traceback etc. If you're familiar with + Markdown, this block will have triple backticks added automatically + around it -- you don't have to add them. + + If you want to present output from multiple commands, please present + that as a shell session (commands you run get prefixed with `$ `). + Please also ensure that the "How to reproduce" section contains matching + instructions for reproducing this. + + - type: textarea + attributes: + label: Expected behavior + description: | + A clear and concise description of what you expected to happen. + placeholder: | + Example: `ggplot` output appears in my notebook. + + - type: textarea + attributes: + label: Actual behavior + description: | + A clear and concise description of what the bug is. + placeholder: | + Example: No output is visible in the notebook and the Server log contains messages about ... + validations: + required: true + + - type: textarea + attributes: + label: Anything else? + description: | + Links? References? Anything that will give us more context about the issue you are encountering! + + Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. + validations: + required: false + + - type: checkboxes + attributes: + label: Latest Docker version + description: You should try to use the latest Docker version + options: + - label: I've updated my Docker version to the latest available, and the issue persists + required: true diff --git a/docker-stacks/.github/ISSUE_TEMPLATE/config.yml b/docker-stacks/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..91e87b3 --- /dev/null +++ b/docker-stacks/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,14 @@ +blank_issues_enabled: false +contact_links: + - name: 📖 - Jupyter Docker Stacks documentation + url: https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html + about: Go to the project's documentation + - name: 🔍 - Troubleshooting common problems + url: https://jupyter-docker-stacks.readthedocs.io/en/latest/using/troubleshooting.html + about: Documentation section on troubleshooting commonly encountered errors + - name: 💬 - Jupyter community Discourse + url: https://discourse.jupyter.org/ + about: Interact with the rest of the Jupyter community + - name: (maintainers only) Blank issue + url: https://github.com/jupyter/docker-stacks/issues/new + about: For maintainers only diff --git a/docker-stacks/.github/ISSUE_TEMPLATE/feature_request.yml b/docker-stacks/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..f0da406 --- /dev/null +++ b/docker-stacks/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,67 @@ +name: Feature request +description: Suggest a new feature for this project +labels: ["type:Enhancement"] + +body: + - type: markdown + attributes: + value: | + Hi! Thanks for using the Jupyter Docker Stacks and taking some time to contribute to this project. + + We'd appreciate it if you could check out the [Suggesting a new feature](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/features.html#suggesting-a-new-feature) + section in the documentation for our preferred processes before submitting a feature request. + + - type: dropdown + attributes: + label: What docker image(s) is this feature applicable to? + description: Select as many images as applicable + multiple: true + options: + - all-spark-notebook + - base-notebook + - datascience-notebook + - docker-stacks-foundation + - julia-notebook + - minimal-notebook + - pyspark-notebook + - pytorch-notebook + - r-notebook + - scipy-notebook + - tensorflow-notebook + - new community stack + validations: + required: true + + - type: textarea + attributes: + label: What change(s) are you proposing? + description: | + Be concise and feel free to add supporting links or references. + placeholder: | + Example: + - Add the [altair](https://altair-viz.github.io) package to the image. + validations: + required: true + + - type: textarea + attributes: + label: How does this affect the user? + description: | + How will the proposed feature affect the user's workflow? + How will this feature make the image more robust, secure, etc.? + placeholder: | + Example: + - Altair is a declarative statistical visualization library for Python, based on Vega and Vega-Lite, and the source is available on GitHub. + - With Altair, you can spend more time understanding your data and its meaning. + - Altair's API is simple, friendly, and consistent and built on top of the powerful Vega-Lite visualization grammar. + - This elegant simplicity produces beautiful and effective visualizations with a minimal amount of code. + validations: + required: true + + - type: textarea + attributes: + label: Anything else? + description: | + Links? References? Anything that will give us more context about the feature you are proposing. + validations: + required: false diff --git a/docker-stacks/.github/actions/create-dev-env/action.yml b/docker-stacks/.github/actions/create-dev-env/action.yml new file mode 100644 index 0000000..7323494 --- /dev/null +++ b/docker-stacks/.github/actions/create-dev-env/action.yml @@ -0,0 +1,20 @@ +name: Build environment +description: Create a build environment + +runs: + using: composite + steps: + # actions/setup-python doesn't support Linux aarch64 runners + # See: https://github.com/actions/setup-python/issues/108 + # python3 is manually preinstalled in the aarch64 VM self-hosted runner + - name: Set Up Python 🐍 + uses: actions/setup-python@v5 + with: + python-version: 3.x + if: runner.arch == 'X64' + + - name: Install Dev Dependencies 📦 + run: | + pip install --upgrade pip + pip install --upgrade -r requirements-dev.txt + shell: bash diff --git a/docker-stacks/.github/actions/load-image/action.yml b/docker-stacks/.github/actions/load-image/action.yml new file mode 100644 index 0000000..cbf5a8a --- /dev/null +++ b/docker-stacks/.github/actions/load-image/action.yml @@ -0,0 +1,24 @@ +name: Load Docker image +description: Download the image tar and load it to Docker + +inputs: + image: + description: Image name + required: true + platform: + description: Image platform + required: true + +runs: + using: composite + steps: + - name: Download built image 📥 + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }} + path: /tmp/jupyter/images/ + - name: Load downloaded image to docker 📥 + run: | + zstd --uncompress --stdout --rm /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}.tar.zst | docker load + docker image ls --all + shell: bash diff --git a/docker-stacks/.github/dependabot.yml b/docker-stacks/.github/dependabot.yml new file mode 100644 index 0000000..8f17357 --- /dev/null +++ b/docker-stacks/.github/dependabot.yml @@ -0,0 +1,19 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + - package-ecosystem: github-actions + directory: .github/actions/create-dev-env/ + schedule: + interval: weekly + - package-ecosystem: github-actions + directory: .github/actions/load-image/ + schedule: + interval: weekly diff --git a/docker-stacks/.github/pull_request_template.md b/docker-stacks/.github/pull_request_template.md new file mode 100644 index 0000000..f034359 --- /dev/null +++ b/docker-stacks/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## Describe your changes + +## Issue ticket if applicable + + + +## Checklist (especially for first-time contributors) + +- [ ] I have performed a self-review of my code +- [ ] If it is a core feature, I have added thorough tests +- [ ] I will try not to use force-push to make the review process easier for reviewers +- [ ] I have updated the documentation for significant changes + + diff --git a/docker-stacks/.github/workflows/aarch64-setup.yml b/docker-stacks/.github/workflows/aarch64-setup.yml new file mode 100644 index 0000000..441a9aa --- /dev/null +++ b/docker-stacks/.github/workflows/aarch64-setup.yml @@ -0,0 +1,30 @@ +name: Test aarch64-runner setup script + +on: + schedule: + # Weekly, at 03:00 on Monday UTC + - cron: "0 3 * * 1" + pull_request: + paths: + - ".github/workflows/aarch64-setup.yml" + - "aarch64-runner/setup.sh" + push: + branches: + - main + paths: + - ".github/workflows/aarch64-setup.yml" + - "aarch64-runner/setup.sh" + workflow_dispatch: + +jobs: + test-script: + # The script itself is not aarch64-specific + # It is easier to test on ubuntu-latest + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Run setup script ✅ + run: sudo ./aarch64-runner/setup.sh diff --git a/docker-stacks/.github/workflows/contributed-recipes.yml b/docker-stacks/.github/workflows/contributed-recipes.yml new file mode 100644 index 0000000..100ed11 --- /dev/null +++ b/docker-stacks/.github/workflows/contributed-recipes.yml @@ -0,0 +1,55 @@ +name: Test the contributed recipes + +on: + schedule: + # Images are rebuilt at 03:00 on Monday UTC + # So we're testing recipes one hour in advance + # They will also be tested after building images + - cron: "0 2 * * 1" + pull_request: + paths: + - ".github/workflows/contributed-recipes.yml" + - "docs/using/recipe_code/**" + push: + branches: + - main + paths: + - ".github/workflows/contributed-recipes.yml" + - "docs/using/recipe_code/**" + workflow_dispatch: + workflow_call: + +jobs: + generate-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Calculate recipes matrix 🛠 + id: set-matrix + run: docs/using/recipe_code/generate_matrix.py >> "${GITHUB_OUTPUT}" + + test-recipes: + runs-on: ${{ matrix.runs-on }} + needs: generate-matrix + if: github.repository_owner == 'jupyter' + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Build recipe 🛠 + # We're pulling here to avoid accidentally using an image that might be present on aarch64 self-hosted runner + run: docker build --pull --rm --force-rm --tag my-custom-image -f ./${{ matrix.dockerfile }} ./ + env: + DOCKER_BUILDKIT: 1 + # Full logs for CI build + BUILDKIT_PROGRESS: plain + working-directory: docs/using/recipe_code + shell: bash + + strategy: + matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} diff --git a/docker-stacks/.github/workflows/docker-build-test-upload.yml b/docker-stacks/.github/workflows/docker-build-test-upload.yml new file mode 100644 index 0000000..01520d3 --- /dev/null +++ b/docker-stacks/.github/workflows/docker-build-test-upload.yml @@ -0,0 +1,110 @@ +name: Download a parent image, build a new one, and test it; then upload the image, tags, and manifests to GitHub artifacts + +env: + REGISTRY: quay.io + OWNER: ${{ github.repository_owner }} + +on: + workflow_call: + inputs: + parent-image: + description: Parent image name + required: true + type: string + image: + description: Image name + required: true + type: string + platform: + description: Image platform + required: true + type: string + runs-on: + description: GitHub Actions Runner image + required: true + type: string + +jobs: + build-test-upload: + runs-on: ${{ inputs.runs-on }} + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + - name: Create dev environment 📦 + uses: ./.github/actions/create-dev-env + + # Self-hosted runners share a state (whole VM) between runs + # Also, they might have running or stopped containers, + # which are not cleaned up by `docker system prune` + - name: Reset docker state and cleanup artifacts 🗑️ + if: inputs.platform != 'x86_64' + run: | + docker kill $(docker ps --quiet) || true + docker rm $(docker ps --all --quiet) || true + docker system prune --all --force + rm -rf /tmp/jupyter/ + shell: bash + + - name: Load parent built image to Docker 📥 + if: inputs.parent-image != '' + uses: ./.github/actions/load-image + with: + image: ${{ inputs.parent-image }} + platform: ${{ inputs.platform }} + + - name: Pull ubuntu:22.04 image 📥 + if: inputs.parent-image == '' + run: docker pull ubuntu:22.04 + shell: bash + + - name: Build image 🛠 + run: docker build --rm --force-rm --tag ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} images/${{ inputs.image }}/ --build-arg REGISTRY=${{ env.REGISTRY }} --build-arg OWNER=${{ env.OWNER }} + env: + DOCKER_BUILDKIT: 1 + # Full logs for CI build + BUILDKIT_PROGRESS: plain + shell: bash + + - name: Run tests ✅ + run: python3 -m tests.run_tests --short-image-name ${{ inputs.image }} --registry ${{ env.REGISTRY }} --owner ${{ env.OWNER }} + shell: bash + + - name: Write tags file 🏷 + run: | + python3 -m tagging.write_tags_file --short-image-name ${{ inputs.image }} --tags-dir /tmp/jupyter/tags/ --registry ${{ env.REGISTRY }} --owner ${{ env.OWNER }} + shell: bash + - name: Upload tags file 💾 + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }}-tags + path: /tmp/jupyter/tags/${{ inputs.platform }}-${{ inputs.image }}.txt + retention-days: 3 + + - name: Write manifest and build history file 🏷 + run: python3 -m tagging.write_manifest --short-image-name ${{ inputs.image }} --hist-lines-dir /tmp/jupyter/hist_lines/ --manifests-dir /tmp/jupyter/manifests/ --registry ${{ env.REGISTRY }} --owner ${{ env.OWNER }} + shell: bash + - name: Upload manifest file 💾 + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }}-manifest + path: /tmp/jupyter/manifests/${{ inputs.platform }}-${{ inputs.image }}-*.md + retention-days: 3 + - name: Upload build history line 💾 + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }}-history_line + path: /tmp/jupyter/hist_lines/${{ inputs.platform }}-${{ inputs.image }}-*.txt + retention-days: 3 + + - name: Save image as a tar for later use 💾 + run: | + mkdir -p /tmp/jupyter/images/ + docker save ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} | zstd > /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}.tar.zst + shell: bash + - name: Upload image as artifact 💾 + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }} + path: /tmp/jupyter/images/${{ inputs.image }}-${{ inputs.platform }}.tar.zst + retention-days: 3 diff --git a/docker-stacks/.github/workflows/docker-merge-tags.yml b/docker-stacks/.github/workflows/docker-merge-tags.yml new file mode 100644 index 0000000..038dbff --- /dev/null +++ b/docker-stacks/.github/workflows/docker-merge-tags.yml @@ -0,0 +1,61 @@ +name: Download all tags from GitHub artifacts and create multi-platform manifests + +env: + OWNER: ${{ github.repository_owner }} + PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') }} + +on: + workflow_call: + inputs: + image: + description: Image name + required: true + type: string + secrets: + REGISTRY_USERNAME: + required: true + REGISTRY_TOKEN: + required: true + +jobs: + merge-tags: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + - name: Create dev environment 📦 + uses: ./.github/actions/create-dev-env + + - name: Download x86_64 tags file 📥 + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.image }}-x86_64-tags + path: /tmp/jupyter/tags/ + - name: Download aarch64 tags file 📥 + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.image }}-aarch64-tags + path: /tmp/jupyter/tags/ + + # Docker might be stuck when pulling images + # https://github.com/docker/for-mac/issues/2083 + # https://stackoverflow.com/questions/38087027/docker-compose-stuck-downloading-or-pulling-fs-layer + - name: Reset docker state 🗑️ + run: | + docker system prune --all --force + sudo systemctl restart docker + shell: bash + + - name: Login to Registry 🔐 + if: env.PUSH_TO_REGISTRY == 'true' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: quay.io + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_TOKEN }} + + - name: Merge tags for the images 🔀 + if: env.PUSH_TO_REGISTRY == 'true' + run: python3 -m tagging.merge_tags --short-image-name ${{ inputs.image }} --tags-dir /tmp/jupyter/tags/ + shell: bash diff --git a/docker-stacks/.github/workflows/docker-tag-push.yml b/docker-stacks/.github/workflows/docker-tag-push.yml new file mode 100644 index 0000000..b68a8d1 --- /dev/null +++ b/docker-stacks/.github/workflows/docker-tag-push.yml @@ -0,0 +1,62 @@ +name: Download a Docker image and its tags from GitHub artifacts, apply them, and push the image to the Registry + +env: + REGISTRY: quay.io + OWNER: ${{ github.repository_owner }} + PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') }} + +on: + workflow_call: + inputs: + image: + description: Image name + required: true + type: string + platform: + description: Image platform + required: true + type: string + secrets: + REGISTRY_USERNAME: + required: true + REGISTRY_TOKEN: + required: true + +jobs: + tag-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + - name: Create dev environment 📦 + uses: ./.github/actions/create-dev-env + - name: Load image to Docker 📥 + uses: ./.github/actions/load-image + with: + image: ${{ inputs.image }} + platform: ${{ inputs.platform }} + + - name: Login to Registry 🔐 + if: env.PUSH_TO_REGISTRY == 'true' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: quay.io + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_TOKEN }} + + - name: Download tags file 📥 + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.image }}-${{ inputs.platform }}-tags + path: /tmp/jupyter/tags/ + - name: Apply tags to the loaded image 🏷 + run: python3 -m tagging.apply_tags --short-image-name ${{ inputs.image }} --tags-dir /tmp/jupyter/tags/ --platform ${{ inputs.platform }} --registry ${{ env.REGISTRY }} --owner ${{ env.OWNER }} + # This step is needed to prevent pushing non-multi-arch "latest" tag + - name: Remove the "latest" tag from the image 🗑️ + run: docker image rmi ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }}:latest + + - name: Push Images to Registry 📤 + if: env.PUSH_TO_REGISTRY == 'true' + run: docker push --all-tags ${{ env.REGISTRY }}/${{ env.OWNER }}/${{ inputs.image }} + shell: bash diff --git a/docker-stacks/.github/workflows/docker-wiki-update.yml b/docker-stacks/.github/workflows/docker-wiki-update.yml new file mode 100644 index 0000000..b5bd0cb --- /dev/null +++ b/docker-stacks/.github/workflows/docker-wiki-update.yml @@ -0,0 +1,48 @@ +name: Download build manifests from GitHub artifacts and push them to GitHub wiki +# We're doing everything in one workflow on purpose +# This way we make sure we don't access wiki pages from several jobs simultaneously + +env: + PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') }} + +on: + workflow_call: + +jobs: + wiki-update: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + - name: Create dev environment 📦 + uses: ./.github/actions/create-dev-env + + - name: Download all history lines 📥 + uses: actions/download-artifact@v4 + with: + pattern: "*-history_line" + path: /tmp/jupyter/hist_lines/ + + - name: Download all manifests 📥 + uses: actions/download-artifact@v4 + with: + pattern: "*-manifest" + path: /tmp/jupyter/manifests/ + + - name: Checkout Wiki Repo 📃 + uses: actions/checkout@v4 + with: + repository: ${{ github.repository }}.wiki + path: wiki/ + + - name: Update wiki 🏷 + run: python3 -m tagging.update_wiki --wiki-dir wiki/ --hist-lines-dir /tmp/jupyter/hist_lines/ --manifests-dir /tmp/jupyter/manifests/ + shell: bash + + - name: Push Wiki to GitHub 📤 + if: env.PUSH_TO_REGISTRY == 'true' + uses: stefanzweifel/git-auto-commit-action@8756aa072ef5b4a080af5dc8fef36c5d586e521d # v5.0.0 + with: + commit_message: "Automated wiki publish for ${{ github.sha }}" + repository: wiki/ diff --git a/docker-stacks/.github/workflows/docker.yml b/docker-stacks/.github/workflows/docker.yml new file mode 100644 index 0000000..a38058c --- /dev/null +++ b/docker-stacks/.github/workflows/docker.yml @@ -0,0 +1,443 @@ +name: Build, test, and push Docker Images + +# [FAST_BUILD] in the PR title makes this workflow only build +# the `jupyter/docker-stacks-foundation` and `jupyter/base-notebook` images +# This allows to run CI faster if a full build is not required +# This only works for a `pull_request` event and does not affect `push` to the `main` branch + +on: + schedule: + # Weekly, at 03:00 on Monday UTC + - cron: "0 3 * * 1" + pull_request: + paths: + - ".github/workflows/docker.yml" + # We use local reusable workflows to make architecture clean and simple + # https://docs.github.com/en/actions/using-workflows/reusing-workflows + - ".github/workflows/docker-build-test-upload.yml" + - ".github/workflows/docker-merge-tags.yml" + - ".github/workflows/docker-tag-push.yml" + - ".github/workflows/docker-wiki-update.yml" + + # We use local composite actions to combine multiple workflow steps within one action + # https://docs.github.com/en/actions/creating-actions/about-custom-actions#composite-actions + - ".github/actions/create-dev-env/action.yml" + - ".github/actions/load-image/action.yml" + + - "images/**" + - "!images/*/README.md" + - "tagging/**" + - "!tagging/README.md" + - "tests/**" + - "!tests/README.md" + - "requirements-dev.txt" + push: + branches: + - main + paths: + - ".github/workflows/docker.yml" + - ".github/workflows/docker-build-test-upload.yml" + - ".github/workflows/docker-merge-tags.yml" + - ".github/workflows/docker-tag-push.yml" + - ".github/workflows/docker-wiki-update.yml" + + - ".github/actions/create-dev-env/action.yml" + - ".github/actions/load-image/action.yml" + + - "images/**" + - "!images/*/README.md" + - "tagging/**" + - "!tagging/README.md" + - "tests/**" + - "!tests/README.md" + - "requirements-dev.txt" + workflow_dispatch: + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + # Only cancel in-progress jobs or runs for the current workflow - matches against branch & tags + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + aarch64-foundation: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: "" + image: docker-stacks-foundation + platform: aarch64 + runs-on: ARM64_FAST + if: github.repository_owner == 'jupyter' + + x86_64-foundation: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: "" + image: docker-stacks-foundation + platform: x86_64 + runs-on: ubuntu-latest + + aarch64-base: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: docker-stacks-foundation + image: base-notebook + platform: aarch64 + runs-on: ARM64_FAST + needs: [aarch64-foundation] + if: github.repository_owner == 'jupyter' + + x86_64-base: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: docker-stacks-foundation + image: base-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-foundation] + + aarch64-minimal: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: base-notebook + image: minimal-notebook + platform: aarch64 + runs-on: ARM64_FAST + needs: [aarch64-base] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-minimal: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: base-notebook + image: minimal-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-base] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-scipy: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: scipy-notebook + platform: aarch64 + runs-on: ARM64_FAST + needs: [aarch64-minimal] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-scipy: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: scipy-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-minimal] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-r: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: r-notebook + platform: aarch64 + runs-on: ARM64_SLOW + needs: [aarch64-minimal] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-r: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: r-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-minimal] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-julia: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: julia-notebook + platform: aarch64 + runs-on: ARM64_SLOW + needs: [aarch64-minimal] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-julia: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: minimal-notebook + image: julia-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-minimal] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-tensorflow: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: tensorflow-notebook + platform: aarch64 + runs-on: ARM64_SLOW + needs: [aarch64-scipy] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-tensorflow: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: tensorflow-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-scipy] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-pytorch: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: pytorch-notebook + platform: aarch64 + runs-on: ARM64_SLOW + needs: [aarch64-scipy] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-pytorch: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: pytorch-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-scipy] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-datascience: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: datascience-notebook + platform: aarch64 + runs-on: ARM64_SLOW + needs: [aarch64-scipy] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-datascience: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: datascience-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-scipy] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-pyspark: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: pyspark-notebook + platform: aarch64 + runs-on: ARM64_FAST + needs: [aarch64-scipy] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-pyspark: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: scipy-notebook + image: pyspark-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-scipy] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-all-spark: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: pyspark-notebook + image: all-spark-notebook + platform: aarch64 + runs-on: ARM64_FAST + needs: [aarch64-pyspark] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-all-spark: + uses: ./.github/workflows/docker-build-test-upload.yml + with: + parent-image: pyspark-notebook + image: all-spark-notebook + platform: x86_64 + runs-on: ubuntu-latest + needs: [x86_64-pyspark] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + aarch64-images-tag-push: + uses: ./.github/workflows/docker-tag-push.yml + with: + platform: aarch64 + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: + [ + docker-stacks-foundation, + base-notebook, + minimal-notebook, + scipy-notebook, + r-notebook, + julia-notebook, + tensorflow-notebook, + pytorch-notebook, + datascience-notebook, + pyspark-notebook, + all-spark-notebook, + ] + needs: + [ + aarch64-foundation, + aarch64-base, + aarch64-minimal, + aarch64-scipy, + aarch64-r, + aarch64-julia, + aarch64-tensorflow, + aarch64-pytorch, + aarch64-datascience, + aarch64-pyspark, + aarch64-all-spark, + ] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + aarch64-images-tag-push-fast: + uses: ./.github/workflows/docker-tag-push.yml + with: + platform: aarch64 + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: [docker-stacks-foundation, base-notebook] + needs: [aarch64-foundation, aarch64-base] + if: github.repository_owner == 'jupyter' && contains(github.event.pull_request.title, '[FAST_BUILD]') + + x86_64-images-tag-push: + uses: ./.github/workflows/docker-tag-push.yml + with: + platform: x86_64 + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: + [ + docker-stacks-foundation, + base-notebook, + minimal-notebook, + scipy-notebook, + r-notebook, + julia-notebook, + tensorflow-notebook, + pytorch-notebook, + datascience-notebook, + pyspark-notebook, + all-spark-notebook, + ] + needs: + [ + x86_64-foundation, + x86_64-base, + x86_64-minimal, + x86_64-scipy, + x86_64-r, + x86_64-julia, + x86_64-tensorflow, + x86_64-pytorch, + x86_64-datascience, + x86_64-pyspark, + x86_64-all-spark, + ] + if: ${{ !contains(github.event.pull_request.title, '[FAST_BUILD]') }} + + x86_64-images-tag-push-fast: + uses: ./.github/workflows/docker-tag-push.yml + with: + platform: x86_64 + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: [docker-stacks-foundation, base-notebook] + needs: [x86_64-foundation, x86_64-base] + if: contains(github.event.pull_request.title, '[FAST_BUILD]') + + merge-tags: + uses: ./.github/workflows/docker-merge-tags.yml + with: + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: + [ + docker-stacks-foundation, + base-notebook, + minimal-notebook, + scipy-notebook, + r-notebook, + julia-notebook, + tensorflow-notebook, + pytorch-notebook, + datascience-notebook, + pyspark-notebook, + all-spark-notebook, + ] + needs: [aarch64-images-tag-push, x86_64-images-tag-push] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + + merge-tags-fast: + uses: ./.github/workflows/docker-merge-tags.yml + with: + image: ${{ matrix.image }} + secrets: + REGISTRY_USERNAME: ${{ secrets.QUAY_USERNAME }} + REGISTRY_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }} + strategy: + matrix: + image: [docker-stacks-foundation, base-notebook] + needs: [aarch64-images-tag-push-fast, x86_64-images-tag-push-fast] + if: github.repository_owner == 'jupyter' && contains(github.event.pull_request.title, '[FAST_BUILD]') + + wiki-update: + uses: ./.github/workflows/docker-wiki-update.yml + needs: [aarch64-images-tag-push, x86_64-images-tag-push] + if: github.repository_owner == 'jupyter' && !contains(github.event.pull_request.title, '[FAST_BUILD]') + permissions: + contents: write + + wiki-update-fast: + uses: ./.github/workflows/docker-wiki-update.yml + needs: [aarch64-images-tag-push-fast, x86_64-images-tag-push-fast] + if: github.repository_owner == 'jupyter' && contains(github.event.pull_request.title, '[FAST_BUILD]') + permissions: + contents: write + + contributed-recipes: + uses: ./.github/workflows/contributed-recipes.yml + needs: [merge-tags] + if: github.repository_owner == 'jupyter' && (github.ref == 'refs/heads/main' || github.event_name == 'schedule') diff --git a/docker-stacks/.github/workflows/pre-commit.yml b/docker-stacks/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..5c2e74d --- /dev/null +++ b/docker-stacks/.github/workflows/pre-commit.yml @@ -0,0 +1,32 @@ +name: Run pre-commit hooks + +on: + pull_request: + push: + branches: + - main + workflow_dispatch: + +permissions: + contents: read + +jobs: + run-hooks: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Set Up Python 🐍 + uses: actions/setup-python@v5 + with: + python-version: 3.x + + - name: Install pre-commit 📦 + run: | + pip install --upgrade pip + pip install --upgrade pre-commit + + - name: Run pre-commit hooks ✅ + run: pre-commit run --all-files --hook-stage manual diff --git a/docker-stacks/.github/workflows/registry-move.yml b/docker-stacks/.github/workflows/registry-move.yml new file mode 100644 index 0000000..09ef3d2 --- /dev/null +++ b/docker-stacks/.github/workflows/registry-move.yml @@ -0,0 +1,59 @@ +name: Move some images from Docker Hub to Quay.io + +env: + OWNER: ${{ github.repository_owner }} + PUSH_TO_REGISTRY: ${{ (github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru') && (github.ref == 'refs/heads/main') }} + +on: + pull_request: + paths: + - ".github/workflows/registry-move.yml" + push: + branches: + - main + paths: + - ".github/workflows/registry-move.yml" + workflow_dispatch: + +jobs: + update-overview: + # To be able to use the latest skopeo + runs-on: macos-latest + if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Install skopeo and Docker 📦 + run: | + brew install skopeo + brew install --cask docker + + - name: Login to Quay.io 🔐 + if: env.PUSH_TO_REGISTRY == 'true' + run: skopeo login quay.io --username ${{ secrets.QUAY_USERNAME }} --password ${{ secrets.QUAY_ROBOT_TOKEN }} + + - name: Move image from Docker Hub to Quay.io 🐳 + if: env.PUSH_TO_REGISTRY == 'true' + run: | + skopeo copy --multi-arch all docker://${{ env.OWNER }}/${{ matrix.image }}:${{ matrix.tag }} docker://quay.io/${{ env.OWNER }}/${{ matrix.image }}:${{ matrix.tag }} + + strategy: + fail-fast: false + matrix: + image: + [ + docker-stacks-foundation, + base-notebook, + minimal-notebook, + scipy-notebook, + r-notebook, + julia-notebook, + tensorflow-notebook, + pytorch-notebook, + datascience-notebook, + pyspark-notebook, + all-spark-notebook, + ] + tag: [missing-tag-expected-to-fail] diff --git a/docker-stacks/.github/workflows/registry-overviews.yml b/docker-stacks/.github/workflows/registry-overviews.yml new file mode 100644 index 0000000..91a6b1f --- /dev/null +++ b/docker-stacks/.github/workflows/registry-overviews.yml @@ -0,0 +1,49 @@ +name: Update Registry overviews + +env: + OWNER: ${{ github.repository_owner }} + +on: + push: + branches: + - main + paths: + - ".github/workflows/registry-overviews.yml" + + - "images/*/README.md" + workflow_dispatch: + +jobs: + update-overview: + runs-on: ubuntu-latest + if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + + - name: Push README to Registry 🐳 + uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 + env: + DOCKER_APIKEY: ${{ secrets.APIKEY__QUAY_IO }} + with: + destination_container_repo: quay.io/${{ env.OWNER }}/${{ matrix.image }} + provider: quay + readme_file: images/${{ matrix.image }}/README.md + + strategy: + matrix: + image: + [ + docker-stacks-foundation, + base-notebook, + minimal-notebook, + scipy-notebook, + r-notebook, + julia-notebook, + tensorflow-notebook, + pytorch-notebook, + datascience-notebook, + pyspark-notebook, + all-spark-notebook, + ] diff --git a/docker-stacks/.github/workflows/sphinx.yml b/docker-stacks/.github/workflows/sphinx.yml new file mode 100644 index 0000000..9856b3e --- /dev/null +++ b/docker-stacks/.github/workflows/sphinx.yml @@ -0,0 +1,50 @@ +name: Build Sphinx Documentation and check links + +on: + schedule: + # Weekly, at 03:00 on Monday UTC + - cron: "0 3 * * 1" + pull_request: + paths: + - ".github/workflows/sphinx.yml" + + - "docs/**" + - "README.md" + push: + branches: + - main + paths: + - ".github/workflows/sphinx.yml" + + - "docs/**" + - "README.md" + workflow_dispatch: + +jobs: + build-docs: + permissions: + contents: write + if: github.repository_owner == 'jupyter' || github.repository_owner == 'mathbunnyru' || github.event_name != 'schedule' + runs-on: ubuntu-latest + + steps: + - name: Checkout Repo ⚡️ + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set Up Python 🐍 + uses: actions/setup-python@v5 + with: + python-version: 3.9 + + - name: Install Doc Dependencies 📦 + run: | + pip install --upgrade pip + pip install --upgrade -r docs/requirements.txt + + - name: Build Documentation 📖 + run: make docs + + - name: Check Documentation URLs 🔗 + run: make linkcheck-docs || make linkcheck-docs diff --git a/docker-stacks/.gitignore b/docker-stacks/.gitignore new file mode 100644 index 0000000..14241e2 --- /dev/null +++ b/docker-stacks/.gitignore @@ -0,0 +1,175 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +################################################################## +# The content above is copied from # +# https://github.com/github/gitignore/blob/main/Python.gitignore # +# Please, add the content only below these lines # +################################################################## + +# Mac OS X +.DS_Store + +# VS Code project configuration +.vscode/ + +# PyCharm project configuration +.idea/ diff --git a/docker-stacks/.hadolint.yaml b/docker-stacks/.hadolint.yaml new file mode 100644 index 0000000..11ee226 --- /dev/null +++ b/docker-stacks/.hadolint.yaml @@ -0,0 +1,5 @@ +--- +ignored: + - DL3006 + - DL3008 + - DL3013 diff --git a/docker-stacks/.markdownlint.yaml b/docker-stacks/.markdownlint.yaml new file mode 100644 index 0000000..ce8b17f --- /dev/null +++ b/docker-stacks/.markdownlint.yaml @@ -0,0 +1,8 @@ +# Default state for all rules +default: true + +# MD013/line-length - Line length +MD013: + # Number of characters + line_length: 200 + tables: false diff --git a/docker-stacks/.pre-commit-config.yaml b/docker-stacks/.pre-commit-config.yaml new file mode 100644 index 0000000..8378636 --- /dev/null +++ b/docker-stacks/.pre-commit-config.yaml @@ -0,0 +1,165 @@ +--- +# pre-commit is a tool to perform a predefined set of tasks manually and/or +# automatically before git commits are made. +# +# Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level +# +# Common tasks +# +# - Run on all files: pre-commit run --all-files +# - Register git hooks: pre-commit install --install-hooks +# +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + # Autoupdate: Python code + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + args: [--py39-plus] + + # Automatically sort python imports + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: [--profile, black] + + # Autoformat: Python code + - repo: https://github.com/psf/black + rev: 24.1.1 + hooks: + - id: black + args: [--target-version=py39] + + # Check python code static typing + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.8.0 + hooks: + - id: mypy + args: [--config, ./mypy.ini] + additional_dependencies: + [ + "beautifulsoup4", + "numpy", + "pytest", + "requests", + "urllib3", + "types-beautifulsoup4", + "types-requests", + "types-tabulate", + "types-urllib3", + ] + # Unfortunately, `pre-commit` only runs on changed files + # This doesn't work well with `mypy --follow-imports error` + # See: https://github.com/pre-commit/mirrors-mypy/issues/34#issuecomment-1062160321 + # + # To work around this we run `mypy` only in manual mode + # So it won't run as part of `git commit` command + # But it will still be run as part of `pre-commit` workflow and give expected results + stages: [manual] + + # Autoformat: YAML, JSON, Markdown, etc. + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + + # `pre-commit sample-config` default hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: trailing-whitespace + + # Lint: Dockerfile + - repo: https://github.com/hadolint/hadolint + rev: v2.12.1-beta + hooks: + - id: hadolint-docker + entry: hadolint/hadolint:v2.12.1-beta hadolint + + # Lint: Dockerfile + # We're linting .dockerfile files as well + - repo: https://github.com/hadolint/hadolint + rev: v2.12.1-beta + hooks: + - id: hadolint-docker + name: Lint *.dockerfile Dockerfiles + entry: hadolint/hadolint:v2.12.1-beta hadolint + types: [file] + files: \.dockerfile$ + + # Lint: YAML + - repo: https://github.com/adrienverge/yamllint + rev: v1.33.0 + hooks: + - id: yamllint + args: ["-d {extends: relaxed, rules: {line-length: disable}}", "-s"] + files: \.(yaml|yml)$ + + # Lint: Bash scripts + - repo: https://github.com/openstack/bashate + rev: 2.1.1 + hooks: + - id: bashate + args: ["--ignore=E006"] + + # Lint: Shell scripts + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.9.0.6 + hooks: + - id: shellcheck + args: ["-x"] + + # Lint: Python + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + + # Lint: Markdown + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.39.0 + hooks: + - id: markdownlint + args: ["--fix"] + + # Strip output from Jupyter notebooks + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + + # nbQA provides tools from the Python ecosystem like + # pyupgrade, isort, black, and flake8, adjusted for notebooks. + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.1 + hooks: + - id: nbqa-pyupgrade + args: [--py39-plus] + - id: nbqa-isort + - id: nbqa-black + args: [--target-version=py39] + - id: nbqa-flake8 + + # Run black on python code blocks in documentation files. + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.16.0 + hooks: + - id: blacken-docs + # --skip-errors is added to allow us to have python syntax highlighting even if + # the python code blocks include jupyter-specific additions such as % or ! + # See https://github.com/adamchainz/blacken-docs/issues/127 for an upstream + # feature request about this. + args: [--target-version=py39, --skip-errors] + +# pre-commit.ci config reference: https://pre-commit.ci/#configuration +ci: + autoupdate_schedule: monthly + # Docker hooks do not work in pre-commit.ci + # See: + skip: [hadolint-docker] diff --git a/docker-stacks/.readthedocs.yaml b/docker-stacks/.readthedocs.yaml new file mode 100644 index 0000000..31dbf0d --- /dev/null +++ b/docker-stacks/.readthedocs.yaml @@ -0,0 +1,35 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/docker-stacks/CODE_OF_CONDUCT.md b/docker-stacks/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..90401f9 --- /dev/null +++ b/docker-stacks/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Project `jupyter/docker-stacks` Code of Conduct + +Please see the [Project Jupyter Code of Conduct](https://github.com/jupyter/governance/blob/HEAD/conduct/code_of_conduct.md). diff --git a/docker-stacks/CONTRIBUTING.md b/docker-stacks/CONTRIBUTING.md new file mode 100644 index 0000000..c14c698 --- /dev/null +++ b/docker-stacks/CONTRIBUTING.md @@ -0,0 +1,10 @@ +Thanks for contributing! +Please see the **Contributor Guide** section in [the documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) +for information about how to contribute +[issues](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/issues.html), +[features](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/features.html), +[recipes](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/recipes.html), +[tests](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/tests.html), +and [community-maintained stacks](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/stacks.html). + + diff --git a/docker-stacks/LICENSE.md b/docker-stacks/LICENSE.md new file mode 100644 index 0000000..247e5e3 --- /dev/null +++ b/docker-stacks/LICENSE.md @@ -0,0 +1,60 @@ +# Licensing terms + +This project is licensed under the terms of the Modified BSD License +(also known as New or Revised or 3-Clause BSD), as follows: + +- Copyright (c) 2001-2015, IPython Development Team +- Copyright (c) 2015-, Jupyter Development Team + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the Jupyter Development Team nor the names of its +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## About the Jupyter Development Team + +The Jupyter Development Team is the set of all contributors to the Jupyter project. +This includes all of the Jupyter subprojects. + +The core team that coordinates development on GitHub can be found here: +. + +## Our Copyright Policy + +Jupyter uses a shared copyright model. Each contributor maintains copyright +over their contributions to Jupyter. But, it is important to note that these +contributions are typically only changes to the repositories. Thus, the Jupyter +source code, in its entirety is not the copyright of any single person or +institution. Instead, it is the collective copyright of the entire Jupyter +Development Team. If individual contributors want to maintain a record of what +changes/contributions they have specific copyright on, they should indicate +their copyright in the commit message of the change, when they commit the +change to one of the Jupyter repositories. + +With this in mind, the following banner should be used in any source code file +to indicate the copyright and license terms: + + # Copyright (c) Jupyter Development Team. + # Distributed under the terms of the Modified BSD License. diff --git a/docker-stacks/Makefile b/docker-stacks/Makefile new file mode 100644 index 0000000..7965b64 --- /dev/null +++ b/docker-stacks/Makefile @@ -0,0 +1,111 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +.PHONY: docs help test + +SHELL:=bash +REGISTRY?=quay.io +OWNER?=jupyter + +# Enable BuildKit for Docker build +export DOCKER_BUILDKIT:=1 + +# All the images listed in the build dependency order +ALL_IMAGES:= \ + docker-stacks-foundation \ + base-notebook \ + minimal-notebook \ + r-notebook \ + julia-notebook \ + scipy-notebook \ + tensorflow-notebook \ + pytorch-notebook \ + datascience-notebook \ + pyspark-notebook \ + all-spark-notebook + + + +# https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html +help: + @echo "jupyter/docker-stacks" + @echo "=====================" + @echo "Replace % with a stack directory name (e.g., make build/minimal-notebook)" + @echo + @grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + + + +build/%: DOCKER_BUILD_ARGS?= +build/%: ## build the latest image for a stack using the system's architecture + docker build $(DOCKER_BUILD_ARGS) --rm --force-rm --tag "$(REGISTRY)/$(OWNER)/$(notdir $@):latest" "./images/$(notdir $@)" --build-arg REGISTRY="$(REGISTRY)" --build-arg OWNER="$(OWNER)" + @echo -n "Built image size: " + @docker images "$(REGISTRY)/$(OWNER)/$(notdir $@):latest" --format "{{.Size}}" +build-all: $(foreach I, $(ALL_IMAGES), build/$(I)) ## build all stacks + + + +check-outdated/%: ## check the outdated mamba/conda packages in a stack and produce a report + @TEST_IMAGE="$(REGISTRY)/$(OWNER)/$(notdir $@)" pytest tests/docker-stacks-foundation/test_outdated.py +check-outdated-all: $(foreach I, $(ALL_IMAGES), check-outdated/$(I)) ## check all the stacks for outdated packages + + + +cont-stop-all: ## stop all containers + @echo "Stopping all containers ..." + -docker stop --time 0 $(shell docker ps --all --quiet) 2> /dev/null +cont-rm-all: ## remove all containers + @echo "Removing all containers ..." + -docker rm --force $(shell docker ps --all --quiet) 2> /dev/null +cont-clean-all: cont-stop-all cont-rm-all ## clean all containers (stop + rm) + + + +docs: ## build HTML documentation + sphinx-build -W --keep-going --color docs/ docs/_build/ +linkcheck-docs: ## check broken links + sphinx-build -W --keep-going --color -b linkcheck docs/ docs/_build/ + + + +hook/%: ## run post-build hooks for an image + python3 -m tagging.write_tags_file --short-image-name "$(notdir $@)" --tags-dir /tmp/jupyter/tags/ --registry "$(REGISTRY)" --owner "$(OWNER)" && \ + python3 -m tagging.write_manifest --short-image-name "$(notdir $@)" --hist-lines-dir /tmp/jupyter/hist_lines/ --manifests-dir /tmp/jupyter/manifests/ --registry "$(REGISTRY)" --owner "$(OWNER)" && \ + python3 -m tagging.apply_tags --short-image-name "$(notdir $@)" --tags-dir /tmp/jupyter/tags/ --platform "$(shell uname -m)" --registry "$(REGISTRY)" --owner "$(OWNER)" +hook-all: $(foreach I, $(ALL_IMAGES), hook/$(I)) ## run post-build hooks for all images + + + +img-list: ## list jupyter images + @echo "Listing $(OWNER) images ..." + docker images "$(OWNER)/*" + docker images "*/$(OWNER)/*" +img-rm-dang: ## remove dangling images (tagged None) + @echo "Removing dangling images ..." + -docker rmi --force $(shell docker images -f "dangling=true" --quiet) 2> /dev/null +img-rm-jupyter: ## remove jupyter images + @echo "Removing $(OWNER) images ..." + -docker rmi --force $(shell docker images --quiet "$(OWNER)/*") 2> /dev/null + -docker rmi --force $(shell docker images --quiet "*/$(OWNER)/*") 2> /dev/null +img-rm: img-rm-dang img-rm-jupyter ## remove dangling and jupyter images + + + +pull/%: ## pull a jupyter image + docker pull "$(REGISTRY)/$(OWNER)/$(notdir $@)" +pull-all: $(foreach I, $(ALL_IMAGES), pull/$(I)) ## pull all images +push/%: ## push all tags for a jupyter image + docker push --all-tags "$(REGISTRY)/$(OWNER)/$(notdir $@)" +push-all: $(foreach I, $(ALL_IMAGES), push/$(I)) ## push all tagged images + + + +run-shell/%: ## run a bash in interactive mode in a stack + docker run -it --rm "$(REGISTRY)/$(OWNER)/$(notdir $@)" $(SHELL) +run-sudo-shell/%: ## run bash in interactive mode as root in a stack + docker run -it --rm --user root "$(REGISTRY)/$(OWNER)/$(notdir $@)" $(SHELL) + + + +test/%: ## run tests against a stack + python3 -m tests.run_tests --short-image-name "$(notdir $@)" --registry "$(REGISTRY)" --owner "$(OWNER)" +test-all: $(foreach I, $(ALL_IMAGES), test/$(I)) ## test all stacks diff --git a/docker-stacks/README.md b/docker-stacks/README.md new file mode 100644 index 0000000..018bdff --- /dev/null +++ b/docker-stacks/README.md @@ -0,0 +1,137 @@ +# Jupyter Docker Stacks + +[![GitHub actions badge](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml/badge.svg) +](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml?query=branch%3Amain "Docker images build status") +[![Read the Docs badge](https://img.shields.io/readthedocs/jupyter-docker-stacks.svg)](https://jupyter-docker-stacks.readthedocs.io/en/latest/ "Documentation build status") +[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/jupyter/docker-stacks/main.svg)](https://results.pre-commit.ci/latest/github/jupyter/docker-stacks/main "pre-commit.ci build status") +[![Discourse badge](https://img.shields.io/discourse/users.svg?color=%23f37626&server=https%3A%2F%2Fdiscourse.jupyter.org)](https://discourse.jupyter.org/ "Jupyter Discourse Forum") +[![Binder badge](https://static.mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?urlpath=lab/tree/README.ipynb "Launch a quay.io/jupyter/base-notebook container on mybinder.org") + +Jupyter Docker Stacks are a set of ready-to-run [Docker images](https://quay.io/organization/jupyter) containing Jupyter applications and interactive computing tools. +You can use a stack image to do any of the following (and more): + +- Start a personal Jupyter Server with the JupyterLab frontend (default) +- Run JupyterLab for a team using JupyterHub +- Start a personal Jupyter Server with the Jupyter Notebook frontend in a local Docker container +- Write your own project Dockerfile + +## Quick Start + +You can [try a relatively recent build of the quay.io/jupyter/base-notebook image on mybinder.org](https://mybinder.org/v2/gh/jupyter/docker-stacks/main?urlpath=lab/tree/README.ipynb). +Otherwise, the examples below may help you get started if you [have Docker installed](https://docs.docker.com/get-docker/), +know [which Docker image](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html) you want to use, and want to launch a single Jupyter Application in a container. + +The [User Guide on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/) describes additional uses and features in detail. + +```{note} +Since `2023-10-20` our images are only pushed to `Quay.io` registry. +Older images are available on Docker Hub, but they will no longer be updated. +``` + +### Example 1 + +This command pulls the `jupyter/scipy-notebook` image tagged `2024-01-15` from Quay.io if it is not already present on the local host. +It then starts a container running a Jupyter Server with the JupyterLab frontend and exposes the container's internal port `8888` to port `10000` of the host machine: + +```bash +docker run -p 10000:8888 quay.io/jupyter/scipy-notebook:2024-01-15 +``` + +You can modify the port on which the container's port is exposed by [changing the value of the `-p` option](https://docs.docker.com/engine/reference/run/#exposed-ports) to `-p 8888:8888`. + +Visiting `http://:10000/?token=` in a browser loads JupyterLab, +where: + +- The `hostname` is the name of the computer running Docker +- The `token` is the secret token printed in the console. + +The container remains intact for restart after the Server exits. + +### Example 2 + +This command pulls the `jupyter/datascience-notebook` image tagged `2024-01-15` from Quay.io if it is not already present on the local host. +It then starts an _ephemeral_ container running a Jupyter Server with the JupyterLab frontend and exposes the server on host port 10000. + +```bash +docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work quay.io/jupyter/datascience-notebook:2024-01-15 +``` + +The use of the `-v` flag in the command mounts the current working directory on the host (`${PWD}` in the example command) as `/home/jovyan/work` in the container. +The server logs appear in the terminal. + +Visiting `http://:10000/?token=` in a browser loads JupyterLab. + +Due to the usage of [the `--rm` flag](https://docs.docker.com/engine/reference/commandline/container_run/#rm) +Docker automatically cleans up the container and removes the file system when the container exits, +but any changes made to the `~/work` directory and its files in the container will remain intact on the host. +[The `-i` flag](https://docs.docker.com/engine/reference/commandline/container_run/#interactive) keeps the container's `STDIN` open, and lets you send input to the container through standard input. +[The `-t` flag](https://docs.docker.com/engine/reference/commandline/container_run/#tty) attaches a pseudo-TTY to the container. + +```{note} +By default, [jupyter's root_dir](https://jupyter-server.readthedocs.io/en/latest/other/full-config.html) is `/home/jovyan`. +So, new notebooks will be saved there, unless you change the directory in the file browser. + +To change the default directory, you must specify `ServerApp.root_dir` by adding this line to the previous command: `start-notebook.py --ServerApp.root_dir=/home/jovyan/work`. +``` + +## Choosing Jupyter frontend + +JupyterLab is the default for all the Jupyter Docker Stacks images. +It is still possible to switch back to Jupyter Notebook (or to launch a different startup command). +You can achieve this by passing the environment variable `DOCKER_STACKS_JUPYTER_CMD=notebook` (or any other valid `jupyter` subcommand) at container startup; +more information is available in the [documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/common.html#alternative-commands). + +## Resources + +- [Documentation on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/) +- [Issue Tracker on GitHub](https://github.com/jupyter/docker-stacks/issues) +- [Jupyter Discourse Forum](https://discourse.jupyter.org/) +- [Jupyter Website](https://jupyter.org) +- [Images on Quay.io](https://quay.io/organization/jupyter) + +## Acknowledgments + +- Starting from `2022-07-05`, `aarch64` self-hosted runners were sponsored by [`@mathbunnyru`](https://github.com/mathbunnyru/). + Please, consider [sponsoring his work](https://github.com/sponsors/mathbunnyru) on GitHub +- Starting from `2023-10-31`, `aarch64` self-hosted runners are sponsored by an amazing [`2i2c non-profit organization`](https://2i2c.org) + +## CPU Architectures + +- We publish containers for both `x86_64` and `aarch64` platforms +- Single-platform images have either `aarch64-` or `x86_64-` tag prefixes, for example, `quay.io/jupyter/base-notebook:aarch64-python-3.11.6` +- Starting from `2022-09-21`, we create multi-platform images (except `tensorflow-notebook`) +- Starting from `2023-06-01`, we create a multi-platform `tensorflow-notebook` image as well + +## Using old images + +This project only builds one set of images at a time. +If you want to use the older `Ubuntu` and/or `Python` version, you can use the following images: + +| Build Date | Ubuntu | Python | Tag | +| ------------ | ------ | ------ | -------------- | +| 2022-10-09 | 20.04 | 3.7 | `1aac87eb7fa5` | +| 2022-10-09 | 20.04 | 3.8 | `a374cab4fcb6` | +| 2022-10-09 | 20.04 | 3.9 | `5ae537728c69` | +| 2022-10-09 | 20.04 | 3.10 | `f3079808ca8c` | +| 2022-10-09 | 22.04 | 3.7 | `b86753318aa1` | +| 2022-10-09 | 22.04 | 3.8 | `7285848c0a11` | +| 2022-10-09 | 22.04 | 3.9 | `ed2908bbb62e` | +| 2023-05-30 | 22.04 | 3.10 | `4d70cf8da953` | +| weekly build | 22.04 | 3.11 | `latest` | + +## Contributing + +Please see the [Contributor Guide on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/) +for information about how to contribute recipes, features, tests, and community-maintained stacks. + +## Alternatives + +- [rocker/binder](https://rocker-project.org/images/versioned/binder.html) - + From the R focused [rocker-project](https://rocker-project.org), + lets you run both RStudio and Jupyter either standalone or in a JupyterHub +- [jupyter/repo2docker](https://github.com/jupyterhub/repo2docker) - + Turn git repositories into Jupyter-enabled Docker Images +- [openshift/source-to-image](https://github.com/openshift/source-to-image) - + A tool for building artifacts from source code and injecting them into docker images +- [jupyter-on-openshift/jupyter-notebooks](https://github.com/jupyter-on-openshift/jupyter-notebooks) - + OpenShift compatible S2I builder for basic notebook images diff --git a/docker-stacks/aarch64-runner/setup.sh b/docker-stacks/aarch64-runner/setup.sh new file mode 100755 index 0000000..fb6e05b --- /dev/null +++ b/docker-stacks/aarch64-runner/setup.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -ex + +GITHUB_RUNNER_USER="runner-user" + +if [ "${EUID}" -ne 0 ]; then + echo "Please run as root" + exit 1 +fi + +apt-get update --yes +apt-get upgrade --yes + +echo "Setting up runner-user, who will run GitHub Actions runner" +adduser --disabled-password --gecos "" ${GITHUB_RUNNER_USER} +mkdir /home/${GITHUB_RUNNER_USER}/.ssh/ +set +e +cp "/home/${SUDO_USER}/.ssh/authorized_keys" "/home/${GITHUB_RUNNER_USER}/.ssh/authorized_keys" +set -e +chown --recursive ${GITHUB_RUNNER_USER}:${GITHUB_RUNNER_USER} /home/${GITHUB_RUNNER_USER}/.ssh + +echo "Setting up python3" +apt-get install --yes --no-install-recommends python3 +curl -sS https://bootstrap.pypa.io/get-pip.py | python3 + +echo "Setting up docker" +apt-get remove --yes docker.io docker-doc docker-compose podman-docker containerd runc +apt-get update --yes +apt-get install --yes ca-certificates curl gnupg +install -m 0755 -d /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg +chmod a+r /etc/apt/keyrings/docker.gpg +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null +apt-get update --yes +apt-get install --yes docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +usermod -aG docker ${GITHUB_RUNNER_USER} +chmod 666 /var/run/docker.sock diff --git a/docker-stacks/binder/Dockerfile b/docker-stacks/binder/Dockerfile new file mode 100644 index 0000000..cad1fd4 --- /dev/null +++ b/docker-stacks/binder/Dockerfile @@ -0,0 +1,18 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# https://quay.io/repository/jupyter/base-notebook?tab=tags +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/base-notebook:2024-01-15 +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV TAG="2024-01-15" + +COPY --chown=${NB_UID}:${NB_GID} binder/README.ipynb "${HOME}"/README.ipynb diff --git a/docker-stacks/binder/README.ipynb b/docker-stacks/binder/README.ipynb new file mode 100644 index 0000000..66630c9 --- /dev/null +++ b/docker-stacks/binder/README.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# jupyter/base-notebook on Binder\n", + "\n", + "Run the cells below to inspect what's in the [jupyter/base-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-base-notebook) image from the Jupyter Docker Stacks project.\n", + "\n", + "You can launch the classic notebook interface in Binder by replacing `lab/tree/*` with `tree/` in the JupyterLab URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "print(\n", + " f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Server is running as the following user." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's the contents of that user's home directory, the default notebook directory for the server." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!ls -al" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`mamba` is available in the user's path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!which mamba" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The user has read/write access to the root mamba environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!ls -l /opt/conda" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following packages are mamba-installed in the base image to support [Jupyter Notebook](https://github.com/jupyter/notebook), [JupyterLab](https://github.com/jupyterlab/jupyterlab), and their use in [JupyterHub](https://github.com/jupyterhub/jupyterhub) environments (e.g., [MyBinder](https://mybinder.org/))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mamba list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Other images in the [jupyter/docker-stacks project](https://github.com/jupyter/docker-stacks) include additional libraries. See the [Jupyter Docker Stacks documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/) for full details." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/docs/_static/contributing/stacks/docker-org-create-token.png b/docker-stacks/docs/_static/contributing/stacks/docker-org-create-token.png new file mode 100644 index 0000000..21105bc Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/docker-org-create-token.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/docker-org-security.png b/docker-stacks/docs/_static/contributing/stacks/docker-org-security.png new file mode 100644 index 0000000..831b3d5 Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/docker-org-security.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/docker-repo-name.png b/docker-stacks/docs/_static/contributing/stacks/docker-repo-name.png new file mode 100644 index 0000000..baf24d5 Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/docker-repo-name.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/docker-user-dropdown.png b/docker-stacks/docs/_static/contributing/stacks/docker-user-dropdown.png new file mode 100644 index 0000000..1cc107e Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/docker-user-dropdown.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/github-actions-tab.png b/docker-stacks/docs/_static/contributing/stacks/github-actions-tab.png new file mode 100644 index 0000000..1c23362 Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/github-actions-tab.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/github-actions-workflow.png b/docker-stacks/docs/_static/contributing/stacks/github-actions-workflow.png new file mode 100644 index 0000000..2dc4a9c Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/github-actions-workflow.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/github-create-secrets.png b/docker-stacks/docs/_static/contributing/stacks/github-create-secrets.png new file mode 100644 index 0000000..1336d0d Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/github-create-secrets.png differ diff --git a/docker-stacks/docs/_static/contributing/stacks/github-secret-token.png b/docker-stacks/docs/_static/contributing/stacks/github-secret-token.png new file mode 100644 index 0000000..5901c26 Binary files /dev/null and b/docker-stacks/docs/_static/contributing/stacks/github-secret-token.png differ diff --git a/docker-stacks/docs/_static/jupyter-logo.svg b/docker-stacks/docs/_static/jupyter-logo.svg new file mode 100644 index 0000000..ab25508 --- /dev/null +++ b/docker-stacks/docs/_static/jupyter-logo.svg @@ -0,0 +1,90 @@ + +Group.svg +Created using Figma 0.90 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker-stacks/docs/_static/using/troubleshooting/vscode-jupyter-settings.png b/docker-stacks/docs/_static/using/troubleshooting/vscode-jupyter-settings.png new file mode 100644 index 0000000..6c60389 Binary files /dev/null and b/docker-stacks/docs/_static/using/troubleshooting/vscode-jupyter-settings.png differ diff --git a/docker-stacks/docs/conf.py b/docker-stacks/docs/conf.py new file mode 100644 index 0000000..efe904b --- /dev/null +++ b/docker-stacks/docs/conf.py @@ -0,0 +1,75 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "docker-stacks" +copyright = "2024, Project Jupyter" +author = "Project Jupyter" + +version = "latest" +release = "latest" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +language = "en" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_static_path = ["_static"] + +# The file above was generated using sphinx 7.2.6 with this command: +# sphinx-quickstart --project "docker-stacks" --author "Project Jupyter" -v "latest" -r "latest" -l en --no-sep --no-makefile --no-batchfile +# These are custom options for this project + +html_theme = "sphinx_book_theme" +html_title = "Docker Stacks documentation" +html_logo = "_static/jupyter-logo.svg" +html_theme_options = { + "logo": { + "text": html_title, + }, + "navigation_with_keys": False, + "path_to_docs": "docs", + "repository_branch": "main", + "repository_url": "https://github.com/jupyter/docker-stacks", + "use_download_button": True, + "use_edit_page_button": True, + "use_issues_button": True, + "use_repository_button": True, +} +html_last_updated_fmt = "%Y-%m-%d" + +extensions = ["myst_parser", "sphinx_copybutton", "sphinx_last_updated_by_git"] +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} +pygments_style = "sphinx" + +# MyST configuration reference: https://myst-parser.readthedocs.io/en/latest/configuration.html +myst_heading_anchors = 3 + +linkcheck_ignore = [ + r".*github\.com.*#", # javascript based anchors + r"https://github\.com/jupyter/docker-stacks/settings/actions/runners/new\?arch=arm64\&os=linux", # only works for users with permissions to change runners + r"http://127\.0\.0\.1:.*", # various examples + r"https://mybinder\.org/v2/gh/.*", # lots of 500 errors +] + +linkcheck_allowed_redirects = { + r"https://results\.pre-commit\.ci/latest/github/jupyter/docker-stacks/main": r"https://results\.pre-commit\.ci/run/github/.*", # Latest main CI build + r"https://github\.com/jupyter/docker-stacks/issues/new.*": r"https://github\.com/login.*", # GitHub wants user to be logon to use this features + r"https://github\.com/orgs/jupyter/teams/docker-image-maintainers/members": r"https://github\.com/login.*", +} diff --git a/docker-stacks/docs/contributing/features.md b/docker-stacks/docs/contributing/features.md new file mode 100644 index 0000000..b4d6819 --- /dev/null +++ b/docker-stacks/docs/contributing/features.md @@ -0,0 +1,57 @@ +# New Features + +Thank you for contributing to the Jupyter Docker Stacks! +We review pull requests for new features (e.g., new packages, new scripts, new flags) +to balance the value of the images to the Jupyter community with the cost of maintaining the images over time. + +## Suggesting a New Feature + +Please follow the process below to suggest a new feature for inclusion in one of the core stacks: + +1. Open a [GitHub feature request issue](https://github.com/jupyter/docker-stacks/issues/new?assignees=&labels=type%3AEnhancement&projects=&template=feature_request.yml) + describing the feature you'd like to contribute. +2. Discuss with the maintainers whether the addition makes sense + in [one of the core stacks](../using/selecting.md#core-stacks), + as a [recipe in the documentation](recipes.md), + as a [community stack](stacks.md), + or as something else entirely. + +## Selection Criteria + +Roughly speaking, we evaluate new features based on the following criteria: + +- **Usefulness to Jupyter users**: + Is the feature generally applicable across domains? + Does it work with JupyterLab, Jupyter Notebook, JupyterHub, etc.? +- **Fit with the image purpose**: + Does the feature match the theme of the stack to which it will be added? + Would it fit better in a new community stack? +- **The complexity of build/runtime configuration**: + How many lines of code does the feature require in one of the Dockerfiles or startup scripts? + Does it require new scripts entirely? + Do users need to adjust how they use the images? +- **Impact on image metrics**: + How many bytes does the feature and its dependencies add to the image(s)? + How many minutes do they add to the build time? +- **Ability to support the addition**: + Can existing maintainers answer user questions and address future build issues? + Are the contributors interested in helping with long-term maintenance? + Can we write tests to ensure the feature continues to work over the years? + +## Submitting a Pull Request + +If there's agreement that the feature belongs in one or more of the core stacks: + +1. Implement the feature in a local clone of the `jupyter/docker-stacks` project. +2. Please, build the image locally before submitting a pull request. + It shortens the debugging cycle by taking some load off GitHub Actions, + which graciously provides free build services for open-source projects like this one. + If you use `make`, call: + + ```bash + make build/ + ``` + +3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request)(PR) with your changes. +4. Watch for GitHub to report a build success or failure for your PR on GitHub. +5. Discuss changes with the maintainers and address any build issues. diff --git a/docker-stacks/docs/contributing/issues.md b/docker-stacks/docs/contributing/issues.md new file mode 100644 index 0000000..680d584 --- /dev/null +++ b/docker-stacks/docs/contributing/issues.md @@ -0,0 +1,19 @@ +# Project Issues + +We appreciate your taking the time to report an issue you encountered using the Jupyter Docker Stacks. +Please review the following guidelines when reporting your problem. + +- If you believe you've found a security vulnerability in any of the Jupyter projects included in Jupyter Docker Stacks images, + please report it to [security@ipython.org](mailto:security@ipython.org), **not in the issue trackers on GitHub**. + If you prefer to encrypt your security reports, you can use [this PGP public key](https://github.com/jupyter/jupyter.github.io/blob/HEAD/assets/ipython_security.asc). +- If you think your problem is unique to the Jupyter Docker Stacks images, + please search the [jupyter/docker-stacks issue tracker](https://github.com/jupyter/docker-stacks/issues) + to see if someone else has already reported the same problem. + If not, please open a [GitHub bug report issue](https://github.com/jupyter/docker-stacks/issues/new?assignees=&labels=type%3ABug&projects=&template=bug_report.yml) + and provide all the information requested in the issue template. + Additionally, check the [Troubleshooting Common Problems](../using/troubleshooting.md) page in the documentation before submitting an issue. +- If the issue you're seeing is with one of the open-source libraries included in the Docker images and is reproducible outside the images, + please file a bug with the appropriate open-source project. +- If you have a general question about how to use the Jupyter Docker Stacks in your environment, + in conjunction with other tools, customizations, and so on, + please post your question on the [Jupyter Discourse site](https://discourse.jupyter.org). diff --git a/docker-stacks/docs/contributing/lint.md b/docker-stacks/docs/contributing/lint.md new file mode 100644 index 0000000..fe83808 --- /dev/null +++ b/docker-stacks/docs/contributing/lint.md @@ -0,0 +1,76 @@ +# Lint + +To enforce some rules, **linters** are used in this project. +Linters can be run either during the **development phase** (by the developer) or the **integration phase** (by GitHub Actions). +To integrate and enforce this process in the project lifecycle, we are using **git hooks** through [pre-commit](https://pre-commit.com/). + +## Using pre-commit hooks + +### Pre-commit hook installation + +_pre-commit_ is a Python package that needs to be installed. +To achieve this, use the generic task to install all Python development dependencies. + +```sh +# Install all development dependencies for the project +pip install --upgrade -r requirements-dev.txt +# It can also be installed directly +pip install pre-commit +``` + +Then the git hooks scripts configured for the project in `.pre-commit-config.yaml` need to be installed in the local git repository. + +```sh +pre-commit install +``` + +### Run + +Now, _pre-commit_ (and so configured hooks) will run automatically on `git commit` on each changed file. +However, it is also possible to trigger it against all files. + +```{note} +Hadolint pre-commit uses Docker to run, so `docker` should be running while running this command. +``` + +```sh +pre-commit run --all-files --hook-stage manual +``` + +```{note} +We're running `pre-commit` with `--hook-stage manual`, because works with changed files, which doesn't work well for mypy. +More information can be found in [`.pre-commit-config.yaml` file](https://github.com/jupyter/docker-stacks/blob/main/.pre-commit-config.yaml) +``` + +## Image Lint + +To comply with [Docker best practices](https://docs.docker.com/develop/develop-images/dockerfile_best-practices), +we are using the [Hadolint](https://github.com/hadolint/hadolint) tool to analyze each `Dockerfile`. + +### Ignoring Rules + +Sometimes it is necessary to ignore [some rules](https://github.com/hadolint/hadolint#rules). +The following rules are ignored by default for all images in the `.hadolint.yaml` file. + +- [`DL3006`][dl3006]: We use a specific policy to manage image tags. + - The `docker-stacks-foundation` `FROM` clause is fixed but based on an argument (`ARG`). + - Building downstream images from (`FROM`) the latest is done on purpose. +- [`DL3008`][dl3008]: System packages are always updated (`apt-get`) to the latest version. +- [`DL3013`][dl3013]: We always install the latest packages using `pip` + +The preferred way to do it for other rules is to flag ignored ones in the `Dockerfile`. + +> It is also possible to ignore rules by using a special comment directly above the Dockerfile instruction you want to make an exception for. +> Ignore rule comments look like `# hadolint ignore=DL3001,SC1081`. +> For example: + +```dockerfile +FROM ubuntu + +# hadolint ignore=DL3003,SC1035 +RUN cd /tmp && echo "hello!" +``` + +[dl3006]: https://github.com/hadolint/hadolint/wiki/DL3006 +[dl3008]: https://github.com/hadolint/hadolint/wiki/DL3008 +[dl3013]: https://github.com/hadolint/hadolint/wiki/DL3013 diff --git a/docker-stacks/docs/contributing/packages.md b/docker-stacks/docs/contributing/packages.md new file mode 100644 index 0000000..d6b7706 --- /dev/null +++ b/docker-stacks/docs/contributing/packages.md @@ -0,0 +1,28 @@ +# Package Updates + +Generally, we do not pin package versions in our `Dockerfile`s. +Dependency resolution is a difficult thing to do. +All this means that packages might have old versions. +Images are rebuilt weekly, so usually, packages receive updates quite frequently. + +```{note} +We pin major.minor version of Python, so this will stay the same even after invoking the `mamba update` command. +``` + +## Outdated packages + +To help identify packages that can be updated, you can use the following helper tool. +It will list all the outdated packages installed in the `Dockerfile` -- +dependencies are filtered to focus only on requested packages. + +```bash +make check-outdated/base-notebook + +# INFO test_outdated:test_outdated.py:80 3/8 (38%) packages could be updated +# INFO test_outdated:test_outdated.py:82 +# Package Current Newest +# ---------- --------- -------- +# conda 4.7.12 4.8.2 +# jupyterlab 1.2.5 2.0.0 +# python 3.7.4 3.8.2 +``` diff --git a/docker-stacks/docs/contributing/recipes.md b/docker-stacks/docs/contributing/recipes.md new file mode 100644 index 0000000..45afb60 --- /dev/null +++ b/docker-stacks/docs/contributing/recipes.md @@ -0,0 +1,12 @@ +# New Recipes + +We welcome contributions of [recipes](../using/recipes.md), and short examples of using, configuring, or extending the Docker Stacks for inclusion in the documentation site. +Follow the process below to add a new recipe: + +1. Open the `docs/using/recipes.md` source file. +2. Add a second-level Markdown heading naming your recipe at the bottom of the file (e.g., `## Slideshows with JupyterLab and RISE`) +3. Write the body of your recipe under the heading, including whatever command line, links, etc. you need. +4. If you have a Dockerfile, please put it in a `recipe_code` subdirectory. + This file will be built automatically by [contributed-recipes workflow](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/contributed-recipes.yml). +5. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) (PR) with your changes. + Maintainers will respond and work with you to address any formatting or content issues. diff --git a/docker-stacks/docs/contributing/stacks.md b/docker-stacks/docs/contributing/stacks.md new file mode 100644 index 0000000..af55199 --- /dev/null +++ b/docker-stacks/docs/contributing/stacks.md @@ -0,0 +1,154 @@ +# Community Stacks + +We love to see the community create and share new Jupyter Docker images. +We've put together a [cookiecutter project](https://github.com/jupyter/cookiecutter-docker-stacks) +and the documentation below to help you get started defining, building, and sharing your Jupyter environments in Docker. + +Following these steps will: + +1. Set up a project on GitHub containing a Dockerfile based on any image we provide. +2. Configure GitHub Actions to build and test your image when users submit pull requests to your repository. +3. Configure Quay.io to host your images for others to use. +4. Update the [list of community stacks](../using/selecting.md#community-stacks) in this documentation to include your image. + +This approach mirrors how we build and share the core stack images. +Feel free to follow it or pave your path using alternative services and build tools. + +## Creating a Project + +First, install [cookiecutter](https://github.com/cookiecutter/cookiecutter) using _pip_ or _mamba_: + +```bash +pip install cookiecutter # or mamba install cookiecutter +``` + +Run the cookiecutter command pointing to the [jupyter/cookiecutter-docker-stacks](https://github.com/jupyter/cookiecutter-docker-stacks) project on GitHub. + +```bash +cookiecutter https://github.com/jupyter/cookiecutter-docker-stacks.git +``` + +Enter a name for your new stack image. +This will serve as both the git repository name and the part of the Docker image name after the slash. + +```text +stack_name [my-jupyter-stack]: +``` + +Enter the user or organization name under which this stack will reside on Quay.io. +You must have access to manage this Quay.io organization to push images here. + +```text +stack_org [my-project]: +``` + +Select an image from the `jupyter/docker-stacks` project that will serve as the base for your new image. + +```text +stack_base_image [quay.io/jupyter/base-notebook]: +``` + +Enter a longer description of the stack for your README. + +```text +stack_description [my-jupyter-stack is a community-maintained Jupyter Docker Stack image]: +``` + +Create a GitHub repository to store your project. +Initialize your project as a Git repository and push it to GitHub. + +```bash +cd + +git init +git add . +git commit -m 'Seed repo' +git remote add origin +git push -u origin main +``` + +## Exploring GitHub Actions + +1. By default, the newly `.github/workflows/docker.yaml` will trigger the CI pipeline whenever you push to your `main` branch + and when any Pull Requests are made to your repository. + For more details on this configuration, visit the [GitHub actions documentation on triggers](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows). + +2. Go to your repository and click on the **Actions** tab. + From there, you can click on the workflows on the left-hand side of the screen. + + ![GitHub page for jupyter/docker-stacks with the Actions tab active and a rectangle around the "Build Docker Images" workflow in the UI](../_static/contributing/stacks/github-actions-tab.png) + + ```{note} + The first run is expected to fail because we haven't yet added Docker credentials to push the image + ``` + +3. In the next screen, you will see information about the workflow run and duration. + If you click the button with the workflow name again, you will see the logs for the workflow steps. + + ![GitHub Actions page showing the "Build Docker Images" workflow](../_static/contributing/stacks/github-actions-workflow.png) + +## Configuring Docker Hub + +```{note} +Jupyter Docker Stacks are hosted on Quay.io, but in this example, we show you how to host your image on Docker Hub. +``` + +Now, configure Docker Hub to build your stack image and push it to the Docker Hub repository whenever +you merge a GitHub pull request to the main branch of your project. + +1. Visit [https://hub.docker.com/](https://hub.docker.com/) and log in. +2. Create a new repository - make sure to use the correct namespace (account or organization). + Enter the name of the image matching the one you entered when prompted with `stack_name` by the cookiecutter. + + ![Docker Hub - 'Create repository' page with the name field set to "My specialized jupyter stack"](../_static/contributing/stacks/docker-repo-name.png) + +3. Enter a description for your image. +4. Click on your avatar in the top-right corner and select Account Settings. + + ![The Docker Hub page zoomed into the user's settings and accounts menu](../_static/contributing/stacks/docker-user-dropdown.png) + +5. Click on **Security** and then click on the **New Access Token** button. + + ![Docker Hub - Account page with the "Security" tab active and a rectangle highlighting the "New Access Token" button in the UI](../_static/contributing/stacks/docker-org-security.png) + +6. Enter a meaningful name for your token and click on **Generate** + + ![Docker Hub - New Access Token page with the name field set to "test-stack token"](../_static/contributing/stacks/docker-org-create-token.png) + +7. Copy the personal access token displayed on the next screen. + + ```{note} + **You will not be able to see it again after you close the pop-up window**. + ``` + +8. Head back to your GitHub repository and click on the **Settings tab**. +9. Click on the **Secrets and variables->Actions** section and then on the **New repository secret** button in the top right corner. + + ![GitHub page with the "Setting" tab active and a rectangle highlighting the "New repository secret" button in the UI](../_static/contributing/stacks/github-create-secrets.png) + +10. Create a **DOCKERHUB_TOKEN** secret and paste the Personal Access Token from Docker Hub in the **value** field. + + ![GitHub - Actions/New secret page with the Name field set to "DOCKERHUB_TOKEN"](../_static/contributing/stacks/github-secret-token.png) + +11. Now you're ready to go and you can restart a failed workflow. + +## Defining Your Image + +Make edits to the Dockerfile in your project to add third-party libraries and configure Jupyter applications. +Refer to the Dockerfiles for the core stacks (e.g., [jupyter/datascience-notebook](https://github.com/jupyter/docker-stacks/blob/main/images/datascience-notebook/Dockerfile)) +to get a feel for what's possible and the best practices. + +[Submit pull requests](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) +to your project repository on GitHub. +Ensure your image builds correctly on GitHub Actions before merging to the main branch. +After merging to the main branch, your image will be built and pushed to the Docker Hub automatically. + +## Sharing Your Image + +Finally, if you'd like to add a link to your project to this documentation site, please do the following: + +1. Fork the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) GitHub repository. +2. Open the `docs/using/selecting.md` source file and locate the **Community Stacks** section in your fork. +3. Add a table entry with a link to your project, a binder link, and a short description of what your Docker image contains. +4. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request)(PR) with your changes. + Maintainers will respond and work with you to address any formatting or content issues. diff --git a/docker-stacks/docs/contributing/tests.md b/docker-stacks/docs/contributing/tests.md new file mode 100644 index 0000000..3d7888b --- /dev/null +++ b/docker-stacks/docs/contributing/tests.md @@ -0,0 +1,46 @@ +# Image Tests + +We greatly appreciate Pull Requests that extend the automated tests that vet the basic functionality of the Docker images. + +## How the Tests Work + +A [GitHub Action workflow](https://github.com/jupyter/docker-stacks/blob/main/.github/workflows/docker.yml) +runs tests against pull requests submitted to the `jupyter/docker-stacks` repository. + +We use the `pytest` module to run tests on the image. +`conftest.py` and `pytest.ini` in the `tests` folder define the environment in which tests are run. +More info on `pytest` can be found [here](https://docs.pytest.org/en/latest/contents.html). + +The actual image-specific test files are located in folders like `tests//` (e.g., `tests/docker-stacks-foundation/`, `tests/minimal-notebook/`, etc.). + +```{note} +If your test is located in `tests//`, it will be run against the `jupyter/` image and against all the [images inherited from this image](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#image-relationships. +``` + +Many tests make use of global [pytest fixtures](https://docs.pytest.org/en/latest/reference/fixtures.html) +defined in the [conftest.py](https://github.com/jupyter/docker-stacks/blob/main/tests/conftest.py) file. + +## Unit tests + +You can add a unit test if you want to run a Python script in one of our images. +You should create a `tests//units/` directory, if it doesn't already exist, and put your file there. +Files in this folder will be executed in the container when tests are run. +You can see an [TensorFlow package example here](https://github.com/jupyter/docker-stacks/blob/HEAD/tests/tensorflow-notebook/units/unit_tensorflow.py). + +## Contributing New Tests + +Please follow the process below to add new tests: + +1. Add your test code to one of the modules in the `tests//` directory or create a new module. +2. Build one or more images you intend to test and run the tests locally. + If you use `make`, call: + + ```bash + make build/ + make test/ + ``` + +3. [Submit a pull request](https://github.com/PointCloudLibrary/pcl/wiki/A-step-by-step-guide-on-preparing-and-submitting-a-pull-request) + (PR) with your changes. +4. Watch for GitHub to report a build success or failure for your PR on GitHub. +5. Discuss changes with the maintainers and address any issues running the tests on GitHub. diff --git a/docker-stacks/docs/images/inherit.svg b/docker-stacks/docs/images/inherit.svg new file mode 100644 index 0000000..5324df8 --- /dev/null +++ b/docker-stacks/docs/images/inherit.svg @@ -0,0 +1,80 @@ + + + + + + + + + blockdiag + + + + + + + + + + + + + + + ubuntu + (LTS with point release) + + docker-stacks-foundation + + base-notebook + + minimal-notebook + + scipy-notebook + + r-notebook + + julia-notebook + + tensorflow-notebook + + pytorch-notebook + + datascience-notebook + + pyspark-notebook + + all-spark-notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker-stacks/docs/index.rst b/docker-stacks/docs/index.rst new file mode 100644 index 0000000..896ca0f --- /dev/null +++ b/docker-stacks/docs/index.rst @@ -0,0 +1,45 @@ +.. include:: ../README.md + :parser: myst_parser.sphinx_ + +Table of Contents +----------------- + +.. toctree:: + :maxdepth: 2 + :caption: User Guide + + using/selecting + using/running + using/common + using/specifics + using/recipes + using/troubleshooting + using/faq + +.. toctree:: + :maxdepth: 2 + :caption: Contributor Guide + + contributing/issues + contributing/packages + contributing/recipes + contributing/lint + contributing/tests + contributing/features + contributing/stacks + +.. toctree:: + :maxdepth: 2 + :caption: Maintainer Guide + + maintaining/new-images-and-packages-policy + maintaining/tasks + maintaining/aarch64-runner + +.. toctree:: + :maxdepth: 2 + :caption: Getting Help + + Issue Tracker on GitHub + Jupyter Discourse Forum + Jupyter Website diff --git a/docker-stacks/docs/maintaining/aarch64-runner.md b/docker-stacks/docs/maintaining/aarch64-runner.md new file mode 100644 index 0000000..a0beed1 --- /dev/null +++ b/docker-stacks/docs/maintaining/aarch64-runner.md @@ -0,0 +1,30 @@ +# Self-hosted runners + +For building `aarch64` images, we use self-hosted GitHub runners. +It is recommended to have at least two runners to allow better parallelism. +Each runner is recommended to have at least _2 cores_ and _30 GB_ of disk space. + +Add a new runner: + +- To use [Oracle OCI](https://www.oracle.com/cloud/), create a compute instance `VM.Standard.A1.Flex`. +- To use [Google Cloud](https://cloud.google.com), use [this instruction](https://cloud.google.com/compute/docs/instances/create-arm-vm-instance#armpublicimage). + +Configure your runner: + +1. Run under `root`: + + ```bash + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/jupyter/docker-stacks/HEAD/aarch64-runner/setup.sh)" + ``` + + This will perform the initial runner setup and create a user `runner-user` without `sudo` capabilities. + +2. Set up a new GitHub Runner under `runner-user` using [GitHub Instructions](https://github.com/jupyter/docker-stacks/settings/actions/runners/new?arch=arm64&os=linux). + **Do not `./run.sh` yet**. +3. Run under `root`: + + ```bash + cd /home/runner-user/actions-runner/ && ./svc.sh install runner-user + ``` + +4. Reboot the VM to apply all updates and run GitHub runner. diff --git a/docker-stacks/docs/maintaining/new-images-and-packages-policy.md b/docker-stacks/docs/maintaining/new-images-and-packages-policy.md new file mode 100644 index 0000000..ec29745 --- /dev/null +++ b/docker-stacks/docs/maintaining/new-images-and-packages-policy.md @@ -0,0 +1,35 @@ +# Policy on adding new images and packages + +There are many things we consider while adding new images and packages. + +Here is a non-exhaustive list of things we do care about: + +1. **Software health**, details, and maintenance status + - reasonable versioning is adopted, and the version is considered to be stable + - has been around for several years + - the package maintains documentation + - a changelog is actively maintained + - a release procedure with helpful automation is established + - multiple people are involved in the maintenance of the project + - provides a `conda-forge` package besides a `pypi` package, where both are kept up to date + - supports both `x86_64` and `aarch64` architectures +2. **Installation consequences** + - GitHub Actions build time + - Image sizes + - All requirements should be installed as well +3. Jupyter Docker Stacks _**image fit**_ + - new package or stack is changing (or inherits from) the most suitable stack +4. **Software impact** for users of docker-stacks images + - How this image can help existing users, or maybe reduce the need to build new images +5. Why it shouldn't just be a documented **recipe** +6. Impact on **security** + - Does the package open additional ports, or add new web endpoints, that could be exploited? + +With all this in mind, we have a voting group, that consists of +[@mathbunnyru](https://github.com/mathbunnyru), +[@consideRatio](https://github.com/consideRatio), +[@yuvipanda](https://github.com/yuvipanda), and +[@manics](https://github.com/manics). + +This voting group is responsible for accepting or declining new packages and stacks. +The change is accepted, if there are **at least 2 positive votes**. diff --git a/docker-stacks/docs/maintaining/tasks.md b/docker-stacks/docs/maintaining/tasks.md new file mode 100644 index 0000000..274bded --- /dev/null +++ b/docker-stacks/docs/maintaining/tasks.md @@ -0,0 +1,71 @@ +# Maintainer Playbook + +## Merging Pull Requests + +To build new images and publish them to the Registry, do the following: + +1. Make sure GitHub Actions status checks pass for the PR. +2. Merge the PR. +3. Monitor the merge commit GitHub Actions status. + + ```{note} + GitHub Actions are pretty reliable, so please investigate if some error occurs. + Building Docker images in PRs is the same as building them in the default branch, + except single-platform images are pushed to Registry and then tags are merged for `x86_64` and `aarch64`. + ``` + +4. Avoid merging another PR to the main branch until all pending builds in the main branch are complete. + This way, you will know which commit might have broken the build + and also have the correct tags for moving tags (like the `Python` version). + +## Updating Python version + +When a new `Python` version is released, we wait for: + +- all the dependencies to be available (as wheels or in `conda-forge`). +- the first `Python` patch release for this version. + This allows us to avoid many bugs, which can happen in a major release. + +## Updating the Ubuntu Base Image + +`jupyter/docker-stacks-foundation` is based on the LTS Ubuntu docker image. +We are waiting for the first point release of the new LTS Ubuntu before updating the version. +Other images are directly or indirectly inherited from `docker-stacks-foundation`. +We rebuild our images automatically each week, which means they frequently receive updates. + +When there's a security fix in the Ubuntu base image, it's a good idea to manually trigger the rebuild of images +[from the GitHub actions workflow UI](https://github.com/jupyter/docker-stacks/actions/workflows/docker.yml). +Pushing the `Run Workflow` button will trigger this process. + +## Adding a New Core Image to the Registry + +```{note} +In general, we do not add new core images and ask contributors to either +create a [recipe](../using/recipes.md) or [community stack](../contributing/stacks.md). +We have a [policy](./new-images-and-packages-policy.md), which we consider when adding new images or new packages to existing images. +``` + +You can see an example of adding a new image [here](https://github.com/jupyter/docker-stacks/pull/1936/files). + +When there's a new stack definition, check before merging the PR: + +1. PR includes an update to the stack overview diagram + [in the documentation](../using/selecting.md#image-relationships). + The image links to the [blockdiag source](http://interactive.blockdiag.com/) used to create it. +2. PR updates the [Makefile](https://github.com/jupyter/docker-stacks/blob/main/Makefile), + which is used to build the stacks in order on GitHub Actions. +3. Necessary Tagger(s)/Manifest(s) are added for the new image + in the [tagging](https://github.com/jupyter/docker-stacks/tree/main/tagging) folder. +4. A new repository is created in the `jupyter` organization in the Registry, + and it's named after the stack folder in the git repo. +5. Robot `Write` permission is added in the `Repository Settings`. + +## Adding a New Registry Owner Account + +1. Visit +2. Add the maintainer's username. + +## Restarting a failed build + +If an automated build in GitHub Actions has got you down, you can restart failed steps on GitHub. +You can also download the artifacts and investigate them for any issues. diff --git a/docker-stacks/docs/requirements.txt b/docker-stacks/docs/requirements.txt new file mode 100644 index 0000000..be05ec4 --- /dev/null +++ b/docker-stacks/docs/requirements.txt @@ -0,0 +1,8 @@ +# ReadTheDocs environment contains old package versions preinstalled +# So, to ensure we have modern packages, we pin minimum versions of the packages we need +docutils>=0.17.1 +myst-parser>=0.18.0 +sphinx>=4.5.0 +sphinx-book-theme>=1.0.0 +sphinx-copybutton>=0.5.0 +sphinx-last-updated-by-git>=0.3.4 diff --git a/docker-stacks/docs/using/common.md b/docker-stacks/docs/using/common.md new file mode 100644 index 0000000..b3b314c --- /dev/null +++ b/docker-stacks/docs/using/common.md @@ -0,0 +1,286 @@ +# Common Features + +Except for `jupyter/docker-stacks-foundation`, a container launched from any Jupyter Docker Stacks image runs a Jupyter Server with the JupyterLab frontend. +The container does so by executing a `start-notebook.py` script. +This script configures the internal container environment and then runs `jupyter lab`, passing any command-line arguments received. + +This page describes the options supported by the startup script and how to bypass it to run alternative commands. + +## Jupyter Server Options + +You can pass [Jupyter Server options](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html) to the `start-notebook.py` script when launching the container. + +1. For example, to secure the Jupyter Server with a [custom password](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#preparing-a-hashed-password) + hashed using `jupyter_server.auth.passwd()` instead of the default token, + you can run the following (this hash was generated for the `my-password` password): + + ```bash + docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook \ + start-notebook.py --PasswordIdentityProvider.hashed_password='argon2:$argon2id$v=19$m=10240,t=10,p=8$JdAN3fe9J45NvK/EPuGCvA$O/tbxglbwRpOFuBNTYrymAEH6370Q2z+eS1eF4GM6Do' + ``` + +2. To set the [base URL](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#running-the-notebook-with-a-customized-url-prefix) of the Jupyter Server, you can run the following: + + ```bash + docker run -it --rm -p 8888:8888 quay.io/jupyter/base-notebook \ + start-notebook.py --ServerApp.base_url=/customized/url/prefix/ + ``` + +## Docker Options + +You may instruct the `start-notebook.py` script to customize the container environment before launching the Server. +You do so by passing arguments to the `docker run` command. + +### User-related configurations + +- `-e NB_USER=` - The desired username and associated home folder. + The default value is `jovyan`. + Setting `NB_USER` refits the `jovyan` default user and ensures that the desired user has the correct file permissions + for the new home directory created at `/home/`. + For this option to take effect, you **must** run the container with `--user root`, set the working directory `-w "/home/"` + and set the environment variable `-e CHOWN_HOME=yes`. + + _Example usage:_ + + ```bash + docker run -it --rm \ + -p 8888:8888 \ + --user root \ + -e NB_USER="my-username" \ + -e CHOWN_HOME=yes \ + -w "/home/my-username" \ + quay.io/jupyter/base-notebook + ``` + + ```{note} + If you set `NB_USER` to `root`, the `root` home dir will be set to `/home/root`. + See discussion [here](https://github.com/jupyter/docker-stacks/issues/2042). + ``` + +- `-e NB_UID=` - Instructs the startup script to switch the numeric user ID of `${NB_USER}` to the given value. + The default value is `1000`. + This feature is useful when mounting host volumes with specific owner permissions. + You **must** run the container with `--user root` for this option to take effect. + (The startup script will `su ${NB_USER}` after adjusting the user ID.) + Instead, you might consider using the modern Docker-native options [`--user`](https://docs.docker.com/engine/reference/run/#user) and + [`--group-add`](https://docs.docker.com/engine/reference/run/#additional-groups) - see the last bullet in this section for more details. + See bullet points regarding `--user` and `--group-add`. + +- `-e NB_GID=` - Instructs the startup script to change the primary group of `${NB_USER}` to `${NB_GID}` + (the new group is added with a name of `${NB_GROUP}` if it is defined. Otherwise, the group is named `${NB_USER}`). + This feature is useful when mounting host volumes with specific group permissions. + You **must** run the container with `--user root` for this option to take effect. + (The startup script will `su ${NB_USER}` after adjusting the group ID.) + Instead, you might consider using modern Docker options `--user` and `--group-add`. + See bullet points regarding `--user` and `--group-add`. + The user is added to the supplemental group `users` (gid 100) to grant write access to the home directory and `/opt/conda`. + If you override the user/group logic, ensure the user stays in the group `users` if you want them to be able to modify files in the image. + +- `-e NB_GROUP=` - The name used for `${NB_GID}`, which defaults to `${NB_USER}`. + This group name is only used if `${NB_GID}` is specified and completely optional: there is only a cosmetic effect. + +- `--user 5000 --group-add users` - Launches the container with a specific user ID and adds that user to the `users` group so that it can modify files in the default home directory and `/opt/conda`. + You can use these arguments as alternatives to setting `${NB_UID}` and `${NB_GID}`. + +## Permission-specific configurations + +- `-e NB_UMASK=` - Configures Jupyter to use a different `umask` value from default, i.e. `022`. + For example, if setting `umask` to `002`, new files will be readable and writable by group members instead of the owner only. + [Check this Wikipedia article](https://en.wikipedia.org/wiki/Umask) for an in-depth description of `umask` and suitable values for multiple needs. + While the default `umask` value should be sufficient for most use cases, you can set the `NB_UMASK` value to fit your requirements. + + ```{note} + `NB_UMASK` when set only applies to the Jupyter process itself - + you cannot use it to set a `umask` for additional files created during `run-hooks.sh`. + For example, via `pip` or `conda`. + If you need to set a `umask` for these, you **must** set the `umask` value for each command. + ``` + +- `-e CHOWN_HOME=yes` - Instructs the startup script to change the `${NB_USER}` home directory owner and group to the current value of `${NB_UID}` and `${NB_GID}`. + This change will take effect even if the user home directory is mounted from the host using `-v` as described below. + The change is **not** applied recursively by default. + You can modify the `chown` behavior by setting `CHOWN_HOME_OPTS` (e.g., `-e CHOWN_HOME_OPTS='-R'`). + +- `-e CHOWN_EXTRA=","` - Instructs the startup script to change the owner and group of each comma-separated container directory to the current value of `${NB_UID}` and `${NB_GID}`. + The change is **not** applied recursively by default. + You can modify the `chown` behavior by setting `CHOWN_EXTRA_OPTS` (e.g., `-e CHOWN_EXTRA_OPTS='-R'`). + +- `-e GRANT_SUDO=yes` - Instructs the startup script to grant the `NB_USER` user passwordless `sudo` capability. + You do **not** need this option to allow the user to `conda` or `pip` install additional packages. + This option is helpful for cases when you wish to give `${NB_USER}` the ability to install OS packages with `apt` or modify other root-owned files in the container. + You **must** run the container with `--user root` for this option to take effect. + (The `start-notebook.py` script will `su ${NB_USER}` after adding `${NB_USER}` to sudoers.) + **You should only enable `sudo` if you trust the user or if the container runs on an isolated host.** + +### Additional runtime configurations + +- `-e GEN_CERT=yes` - Instructs the startup script to generate a self-signed SSL certificate. + Configures Jupyter Server to use it to accept encrypted HTTPS connections. +- `-e DOCKER_STACKS_JUPYTER_CMD=` - Instructs the startup script to run `jupyter ${DOCKER_STACKS_JUPYTER_CMD}` instead of the default `jupyter lab` command. + See [Switching back to the classic notebook or using a different startup command][switch_back] for available options. + This setting is helpful in container orchestration environments where setting environment variables is more straightforward than changing command line parameters. +- `-e RESTARTABLE=yes` - Runs Jupyter in a loop so that quitting Jupyter does not cause the container to exit. + This may be useful when installing extensions that require restarting Jupyter. +- `-v /some/host/folder/for/work:/home/jovyan/work` - Mounts a host machine directory as a folder in the container. + This configuration is useful for preserving notebooks and other work even after the container is destroyed. + **You must grant the within-container notebook user or group (`NB_UID` or `NB_GID`) write access to the host directory (e.g., `sudo chown 1000 /some/host/folder/for/work`).** +- `-e JUPYTER_ENV_VARS_TO_UNSET=ADMIN_SECRET_1,ADMIN_SECRET_2` - Unsets specified environment variables in the default startup script. + The variables are unset after the hooks have been executed but before the command provided to the startup script runs. +- `-e NOTEBOOK_ARGS="--log-level='DEBUG' --dev-mode"` - Adds custom options to add to `jupyter` commands. + This way, the user could use any option supported by the `jupyter` subcommand. +- `-e JUPYTER_PORT=8117` - Changes the port in the container that Jupyter is using to the value of the `${JUPYTER_PORT}` environment variable. + This may be useful if you run multiple instances of Jupyter in swarm mode and want to use a different port for each instance. + +## Startup Hooks + +You can further customize the container environment by adding shell scripts (`*.sh`) to be sourced +or executables (`chmod +x`) to be run to the paths below: + +- `/usr/local/bin/start-notebook.d/` - handled **before** any of the standard options noted above are applied +- `/usr/local/bin/before-notebook.d/` - handled **after** all the standard options noted above are applied + and ran right before the Server launches + +See the `run-hooks.sh` script [here](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.sh) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh) +script for execution details. + +## SSL Certificates + +You may mount an SSL key and certificate file into a container and configure the Jupyter Server to use them to accept HTTPS connections. +For example, to mount a host folder containing a `notebook.key` and `notebook.crt` and use them, you might run the following: + +```bash +docker run -it --rm -p 8888:8888 \ + -v /some/host/folder:/etc/ssl/notebook \ + quay.io/jupyter/base-notebook \ + start-notebook.py \ + --ServerApp.keyfile=/etc/ssl/notebook/notebook.key \ + --ServerApp.certfile=/etc/ssl/notebook/notebook.crt +``` + +Alternatively, you may mount a single PEM file containing both the key and certificate. +For example: + +```bash +docker run -it --rm -p 8888:8888 \ + -v /some/host/folder/notebook.pem:/etc/ssl/notebook.pem \ + quay.io/jupyter/base-notebook \ + start-notebook.py \ + --ServerApp.certfile=/etc/ssl/notebook.pem +``` + +In either case, Jupyter Server expects the key and certificate to be a **base64 encoded text file**. +The certificate file or PEM may contain one or more certificates (e.g., server, intermediate, and root). + +For additional information about using SSL, see the following: + +- The [docker-stacks/examples](https://github.com/jupyter/docker-stacks/tree/main/examples) + for information about how to use + [Let's Encrypt](https://letsencrypt.org/) certificates when you run these stacks on a publicly visible domain. +- The [`jupyter_server_config.py`](https://github.com/jupyter/docker-stacks/blob/main/images/base-notebook/jupyter_server_config.py) + file for how this Docker image generates a self-signed certificate. +- The [Jupyter Server documentation](https://jupyter-server.readthedocs.io/en/latest/operators/public-server.html#securing-a-jupyter-server) + for best practices about securing a public Server in general. + +## Alternative Commands + +### Switching back to the classic notebook or using a different startup command + +JupyterLab, built on top of Jupyter Server, is now the default for all the images of the stack. +However, switching back to the classic notebook or using a different startup command is still possible. +You can achieve this by setting the environment variable `DOCKER_STACKS_JUPYTER_CMD` at container startup. +The table below shows some options. +Since `Jupyter Notebook v7` `jupyter-server` is used as a backend. + +| `DOCKER_STACKS_JUPYTER_CMD` | Frontend | +| --------------------------- | ---------------- | +| `lab` (default) | JupyterLab | +| `notebook` | Jupyter Notebook | +| `nbclassic` | NbClassic | +| `server` | None | +| `retro`\* | RetroLab | + +```{note} +- Changing frontend for **JupyterHub singleuser image** is described in [JupyterHub docs](https://jupyterhub.readthedocs.io/en/latest/howto/configuration/config-user-env.html#switching-back-to-the-classic-notebook). +- \* `retro` is not installed at this time, but it could be the case in the future or in a community stack. +- Any other valid `jupyter` subcommand that starts the Jupyter Application can be used. +``` + +Example: + +```bash +# Run Jupyter Server with the Jupyter Notebook frontend +docker run -it --rm \ + -p 8888:8888 \ + -e DOCKER_STACKS_JUPYTER_CMD=notebook \ + quay.io/jupyter/base-notebook +# Executing the command: jupyter notebook ... + +# Use Jupyter NBClassic frontend +docker run -it --rm \ + -p 8888:8888 \ + -e DOCKER_STACKS_JUPYTER_CMD=nbclassic \ + quay.io/jupyter/base-notebook +# Executing the command: jupyter nbclassic ... +``` + +### `start.sh` + +Most of the configuration options in the `start-notebook.py` script are handled by an internal `start.sh` script that automatically runs before the command provided to the container +(it's set as the container entrypoint). +This allows you to specify an arbitrary command that takes advantage of all these features. +For example, to run the text-based `ipython` console in a container, do the following: + +```bash +docker run -it --rm quay.io/jupyter/base-notebook ipython +``` + +This script is handy when you derive a new Dockerfile from this image and install additional Jupyter applications with subcommands like `jupyter console`, `jupyter kernelgateway`, etc. + +## Conda Environments + +The default Python 3.x [Conda environment](https://conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) resides in `/opt/conda`. +The `/opt/conda/bin` directory is part of the default `jovyan` user's `${PATH}`. +That directory is also searched for binaries when run using `sudo` (`sudo my_binary` will search for `my_binary` in `/opt/conda/bin/` + +The `jovyan` user has full read/write access to the `/opt/conda` directory. +You can use either `mamba`, `pip`, or `conda` (`mamba` is recommended) to install new packages without any additional permissions. + +```bash +# install a package into the default (python 3.x) environment and cleanup it after +# the installation +mamba install --yes some-package && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +pip install --no-cache-dir some-package && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +conda install --yes some-package && \ + conda clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" +``` + +### Using Alternative Channels + +Conda is configured by default to use only the [`conda-forge`](https://anaconda.org/conda-forge) channel. +However, you can use alternative channels, either one-shot by overwriting the default channel in the installation command or by configuring `mamba` to use different channels. +The examples below show how to use the [anaconda default channels](https://repo.anaconda.com/pkgs/main) instead of `conda-forge` to install packages. + +```bash +# using defaults channels to install a package +mamba install --channel defaults humanize + +# configure conda to add default channels at the top of the list +conda config --system --prepend channels defaults + +# install a package +mamba install --yes humanize && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" +``` + +[switch_back]: #switching-back-to-the-classic-notebook-or-using-a-different-startup-command diff --git a/docker-stacks/docs/using/faq.md b/docker-stacks/docs/using/faq.md new file mode 100644 index 0000000..a8b7168 --- /dev/null +++ b/docker-stacks/docs/using/faq.md @@ -0,0 +1,47 @@ +# Frequently Asked Questions (FAQ) + +## How to persist user data + +There are 2 types of data, which you might want to persist. + +1. If you want to persist your environment (i.e. packages installed by `mamba`, `conda`, `pip`, `apt-get`, and so on), + then you should create an inherited image and install packages only once while building your Dockerfile. + An example of using `mamba` and `pip` in a child image is available + [here](./recipes.md#using-mamba-install-recommended-or-pip-install-in-a-child-docker-image). + + ```{note} + If you install a package inside a running container (for example you run `pip install ` in a terminal), + it won't be preserved when you next run your image. + To make it work, install this package in your inherited image and rerun `docker build` command. + ``` + +2. If you want to persist user data (files created by you, like `Python` scripts, notebooks, text files, and so on), + then you should use a + [Docker bind mount](https://docs.docker.com/storage/bind-mounts/) or + [Docker Volume](https://docs.docker.com/storage/volumes/). + You can find [an example of using a bind mount here](./running.md#example-2). + There is also [a mount troubleshooting section](./troubleshooting.md#permission-denied-when-mounting-volumes) if you experience any issues. + +## Why we do not add your favorite package + +We have lots of users with different packages they want to use. +Adding them all is impossible, so we have several images to choose from. +[Choose the image](selecting.md), that is closest to your needs, and feel free to [add your package on top of our images](recipes.md#using-mamba-install-recommended-or-pip-install-in-a-child-docker-image). + +## Who is `jovyan` + +As described [here](https://github.com/jupyter/docker-stacks/issues/358#issuecomment-288844834): + +```text +Jo·vy·an +/ˈjōvēən/ +noun – an inhabitant of Jupyter +``` + +`Jovyan` is often a special term used to describe members of the Jupyter community. +It is also used as the user ID in the Jupyter Docker stacks or referenced in conversations. +You can find more information [here](https://docs.jupyter.org/en/latest/community/content-community.html#what-is-a-jovyan). + +## How to give root permissions to the user + +We have a [recipe for enabling root permissions](recipes.md#using-sudo-within-a-container). diff --git a/docker-stacks/docs/using/recipe_code/custom_environment.dockerfile b/docker-stacks/docs/using/recipe_code/custom_environment.dockerfile new file mode 100644 index 0000000..547245c --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/custom_environment.dockerfile @@ -0,0 +1,48 @@ +FROM quay.io/jupyter/minimal-notebook + +# Name your environment and choose the Python version +ARG env_name=python310 +ARG py_ver=3.10 + +# You can add additional libraries here +RUN mamba create --yes -p "${CONDA_DIR}/envs/${env_name}" \ + python=${py_ver} \ + 'ipykernel' \ + 'jupyterlab' && \ + mamba clean --all -f -y + +# Alternatively, you can comment out the lines above and uncomment those below +# if you'd prefer to use a YAML file present in the docker build context + +# COPY --chown=${NB_UID}:${NB_GID} environment.yml /tmp/ +# RUN mamba env create -p "${CONDA_DIR}/envs/${env_name}" -f /tmp/environment.yml && \ +# mamba clean --all -f -y + +# Create Python kernel and link it to jupyter +RUN "${CONDA_DIR}/envs/${env_name}/bin/python" -m ipykernel install --user --name="${env_name}" && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Any additional `pip` installs can be added by using the following line +# Using `mamba` is highly recommended though +RUN "${CONDA_DIR}/envs/${env_name}/bin/pip" install --no-cache-dir \ + 'flake8' + +# This changes the custom Python kernel so that the custom environment will +# be activated for the respective Jupyter Notebook and Jupyter Console +# hadolint ignore=DL3059 +RUN /opt/setup-scripts/activate_notebook_custom_env.py "${env_name}" + +# Comment the line above and uncomment the section below instead to activate the custom environment by default +# Note: uncommenting this section makes "${env_name}" default both for Jupyter Notebook and Terminals +# More information here: https://github.com/jupyter/docker-stacks/pull/2047 +# USER root +# RUN \ +# # This changes a startup hook, which will activate the custom environment for the process +# echo conda activate "${env_name}" >> /usr/local/bin/before-notebook.d/10activate-conda-env.sh && \ +# # This makes the custom environment default in Jupyter Terminals for all users which might be created later +# echo conda activate "${env_name}" >> /etc/skel/.bashrc && \ +# # This makes the custom environment default in Jupyter Terminals for already existing NB_USER +# echo conda activate "${env_name}" >> "/home/${NB_USER}/.bashrc" + +USER ${NB_UID} diff --git a/docker-stacks/docs/using/recipe_code/dask_jupyterlab.dockerfile b/docker-stacks/docs/using/recipe_code/dask_jupyterlab.dockerfile new file mode 100644 index 0000000..f1cd0e6 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/dask_jupyterlab.dockerfile @@ -0,0 +1,10 @@ +FROM quay.io/jupyter/base-notebook + +# Install the Dask dashboard +RUN mamba install --yes 'dask-labextension' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Dask Scheduler port +EXPOSE 8787 diff --git a/docker-stacks/docs/using/recipe_code/generate_matrix.py b/docker-stacks/docs/using/recipe_code/generate_matrix.py new file mode 100755 index 0000000..62159f3 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/generate_matrix.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import json +from pathlib import Path +from typing import Any + +THIS_DIR = Path(__file__).parent.resolve() + + +def generate_matrix() -> dict[str, Any]: + dockerfiles = sorted(file.name for file in THIS_DIR.glob("*.dockerfile")) + return { + "dockerfile": dockerfiles, + "runs-on": ["ubuntu-latest", "ARM64"], + "exclude": [{"dockerfile": "oracledb.dockerfile", "runs-on": "ARM64"}], + } + + +if __name__ == "__main__": + print("matrix=" + json.dumps(generate_matrix())) diff --git a/docker-stacks/docs/using/recipe_code/jupyterhub_version.dockerfile b/docker-stacks/docs/using/recipe_code/jupyterhub_version.dockerfile new file mode 100644 index 0000000..7fd5301 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/jupyterhub_version.dockerfile @@ -0,0 +1,6 @@ +FROM quay.io/jupyter/base-notebook + +RUN mamba install --yes 'jupyterhub==4.0.1' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/mamba_install.dockerfile b/docker-stacks/docs/using/recipe_code/mamba_install.dockerfile new file mode 100644 index 0000000..2c4d2c4 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/mamba_install.dockerfile @@ -0,0 +1,13 @@ +FROM quay.io/jupyter/base-notebook + +RUN mamba install --yes 'flake8' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Install from the requirements.txt file +COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ +RUN mamba install --yes --file /tmp/requirements.txt && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/manpage_install.dockerfile b/docker-stacks/docs/using/recipe_code/manpage_install.dockerfile new file mode 100644 index 0000000..ed8d91d --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/manpage_install.dockerfile @@ -0,0 +1,16 @@ +FROM quay.io/jupyter/base-notebook + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# `/etc/dpkg/dpkg.cfg.d/excludes` contains several `path-exclude`s, including man pages +# Remove it, then install man, install docs +RUN rm /etc/dpkg/dpkg.cfg.d/excludes && \ + apt-get update --yes && \ + dpkg -l | grep ^ii | cut -d' ' -f3 | xargs apt-get install --yes --no-install-recommends --reinstall man && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +USER ${NB_UID} diff --git a/docker-stacks/docs/using/recipe_code/microsoft_odbc.dockerfile b/docker-stacks/docs/using/recipe_code/microsoft_odbc.dockerfile new file mode 100644 index 0000000..1138d69 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/microsoft_odbc.dockerfile @@ -0,0 +1,30 @@ +FROM quay.io/jupyter/base-notebook + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +ENV MSSQL_DRIVER "ODBC Driver 18 for SQL Server" +ENV PATH="/opt/mssql-tools18/bin:${PATH}" + +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends curl gnupg2 lsb-release && \ + curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ + curl "https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list" > /etc/apt/sources.list.d/mssql-release.list && \ + apt-get update --yes && \ + ACCEPT_EULA=Y apt-get install --yes --no-install-recommends msodbcsql18 && \ + # optional: for bcp and sqlcmd + ACCEPT_EULA=Y apt-get install --yes --no-install-recommends mssql-tools18 && \ + # optional: for unixODBC development headers + apt-get install --yes --no-install-recommends unixodbc-dev && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Switch back to jovyan to avoid accidental container runs as root +USER ${NB_UID} + +RUN mamba install --yes 'pyodbc' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/oracledb.dockerfile b/docker-stacks/docs/using/recipe_code/oracledb.dockerfile new file mode 100644 index 0000000..85564d0 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/oracledb.dockerfile @@ -0,0 +1,59 @@ +FROM quay.io/jupyter/base-notebook + +USER root + +# Install Java & Oracle SQL Instant Client +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends software-properties-common && \ + add-apt-repository universe && \ + apt-get update --yes && \ + apt-get install --yes --no-install-recommends alien default-jre default-jdk openjdk-11-jdk libaio1 && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Oracle +ARG INSTANTCLIENT_MAJOR_VERSION=21 +ARG INSTANTCLIENT_VERSION=${INSTANTCLIENT_MAJOR_VERSION}.11.0.0.0-1 +ARG INSTANTCLIENT_URL=https://download.oracle.com/otn_software/linux/instantclient/2111000 + +# Then install Oracle SQL Instant client, SQL+Plus, tools, and JDBC. +# Note: You may need to change the URL to a newer version. +# See: https://www.oracle.com/es/database/technologies/instant-client/linux-x86-64-downloads.html +RUN mkdir "/opt/oracle" +WORKDIR "/opt/oracle" +RUN wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-basiclite-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + alien --install --scripts "oracle-instantclient-basiclite-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-sqlplus-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + alien --install --scripts "oracle-instantclient-sqlplus-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-tools-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + alien --install --scripts "oracle-instantclient-tools-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + wget --progress=dot:giga "${INSTANTCLIENT_URL}/oracle-instantclient-jdbc-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + alien --install --scripts "oracle-instantclient-jdbc-${INSTANTCLIENT_VERSION}.el8.x86_64.rpm" && \ + chown -R "${NB_UID}":"${NB_GID}" "${HOME}/.rpmdb" && \ + rm -f ./*.rpm + +# And configure variables +RUN echo "ORACLE_HOME=/usr/lib/oracle/${INSTANTCLIENT_MAJOR_VERSION}/client64" >> "${HOME}/.bashrc" && \ + echo "PATH=${ORACLE_HOME}/bin:${PATH}" >> "${HOME}/.bashrc" && \ + echo "LD_LIBRARY_PATH=${ORACLE_HOME}/lib:${LD_LIBRARY_PATH}" >> "${HOME}/.bashrc" && \ + echo "export ORACLE_HOME" >> "${HOME}/.bashrc" && \ + echo "export PATH" >> "${HOME}/.bashrc" && \ + echo "export LD_LIBRARY_PATH" >> "${HOME}/.bashrc" + +# Add credentials for /redacted/ using Oracle DB. +WORKDIR /usr/lib/oracle/${INSTANTCLIENT_MAJOR_VERSION}/client64/lib/network/admin/ +# Add a wildcard `[]` on the last letter of the filename to avoid throwing an error if the file does not exist. +# See: https://stackoverflow.com/questions/31528384/conditional-copy-add-in-dockerfile +COPY cwallet.ss[o] ./ +COPY sqlnet.or[a] ./ +COPY tnsnames.or[a] ./ + +# Switch back to jovyan to avoid accidental container runs as root +USER "${NB_UID}" + +WORKDIR "${HOME}" + +# Install `oracledb` Python library to use Oracle SQL Instant Client +RUN mamba install --yes 'oracledb' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/pip_install.dockerfile b/docker-stacks/docs/using/recipe_code/pip_install.dockerfile new file mode 100644 index 0000000..fc6508b --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/pip_install.dockerfile @@ -0,0 +1,12 @@ +FROM quay.io/jupyter/base-notebook + +# Install in the default python3 environment +RUN pip install --no-cache-dir 'flake8' && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Install from the requirements.txt file +COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/ +RUN pip install --no-cache-dir --requirement /tmp/requirements.txt && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/requirements.txt b/docker-stacks/docs/using/recipe_code/requirements.txt new file mode 100644 index 0000000..3379e2a --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/requirements.txt @@ -0,0 +1 @@ +autoflake diff --git a/docker-stacks/docs/using/recipe_code/rise_jupyterlab.dockerfile b/docker-stacks/docs/using/recipe_code/rise_jupyterlab.dockerfile new file mode 100644 index 0000000..7d796ca --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/rise_jupyterlab.dockerfile @@ -0,0 +1,6 @@ +FROM quay.io/jupyter/base-notebook + +RUN mamba install --yes 'jupyterlab_rise' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/spellcheck_notebook_v6.dockerfile b/docker-stacks/docs/using/recipe_code/spellcheck_notebook_v6.dockerfile new file mode 100644 index 0000000..1e12b56 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/spellcheck_notebook_v6.dockerfile @@ -0,0 +1,9 @@ +# Using Docker Hub here, because this image is old and not pushed to Quay.io +FROM docker.io/jupyter/base-notebook:notebook-6.5.4 + +RUN pip install --no-cache-dir 'jupyter_contrib_nbextensions' && \ + jupyter contrib nbextension install --user && \ + # can modify or enable additional extensions here + jupyter nbclassic-extension enable spellchecker/main --user && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipe_code/xgboost.dockerfile b/docker-stacks/docs/using/recipe_code/xgboost.dockerfile new file mode 100644 index 0000000..14afc79 --- /dev/null +++ b/docker-stacks/docs/using/recipe_code/xgboost.dockerfile @@ -0,0 +1,6 @@ +FROM quay.io/jupyter/base-notebook + +RUN mamba install --yes 'py-xgboost' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/docs/using/recipes.md b/docker-stacks/docs/using/recipes.md new file mode 100644 index 0000000..f42696d --- /dev/null +++ b/docker-stacks/docs/using/recipes.md @@ -0,0 +1,527 @@ +# Contributed Recipes + +Users sometimes share interesting ways of using the Jupyter Docker Stacks. +We encourage users to [contribute these recipes](../contributing/recipes.md) to the documentation in case they prove helpful to other community members by submitting a pull request to `docs/using/recipes.md`. +The sections below capture this knowledge. + +## Using `sudo` within a container + +Password authentication is disabled for the `NB_USER` (e.g., `jovyan`). +We made this choice to avoid distributing images with a weak default password that users ~might~ will forget to change before running a container on a publicly accessible host. + +You can grant the within-container `NB_USER` passwordless `sudo` access by adding `--user root` and `-e GRANT_SUDO=yes` to your Docker command line or appropriate container orchestrator config. + +For example: + +```bash +docker run -it --rm \ + --user root \ + -e GRANT_SUDO=yes \ + quay.io/jupyter/base-notebook +``` + +**You should only enable `sudo` if you trust the user and/or if the container is running on an isolated host.** +See [Docker security documentation](https://docs.docker.com/engine/security/userns-remap/) for more information about running containers as `root`. + +## Using `mamba install` (recommended) or `pip install` in a Child Docker image + +Create a new Dockerfile like the one shown below. +To use a requirements.txt file, first, create your `requirements.txt` file with the listing of packages desired. + +```{literalinclude} recipe_code/mamba_install.dockerfile +:language: docker +``` + +`pip` usage is similar: + +```{literalinclude} recipe_code/pip_install.dockerfile +:language: docker +``` + +Then build a new image. + +```bash +docker build --rm --tag my-custom-image . +``` + +You can then run the image as follows: + +```bash +docker run -it --rm \ + -p 8888:8888 \ + my-custom-image +``` + +## Add a custom conda environment and Jupyter kernel + +The default version of `Python` that ships with the image may not be the version you want. +The instructions below permit adding a conda environment with a different `Python` version and making it accessible to Jupyter. +You may also use older images like `jupyter/base-notebook:python-3.10`. +A list of all tags can be found [here](https://github.com/jupyter/docker-stacks/wiki) + +```{literalinclude} recipe_code/custom_environment.dockerfile +:language: docker +``` + +## Dask JupyterLab Extension + +[Dask JupyterLab Extension](https://github.com/dask/dask-labextension) provides a JupyterLab extension to manage Dask clusters, as well as embed Dask's dashboard plots directly into JupyterLab panes. +Create the Dockerfile as: + +```{literalinclude} recipe_code/dask_jupyterlab.dockerfile +:language: docker +``` + +And build the image as: + +```bash +docker build --rm --tag my-custom-image . +``` + +Once built, run using the command: + +```bash +docker run -it --rm \ + -p 8888:8888 \ + -p 8787:8787 \ + my-custom-image +``` + +## Let's Encrypt a Server + +```{warning} +This recipe is not tested and might be broken. +``` + +See the README for basic automation here + +which includes steps for requesting and renewing a Let's Encrypt certificate. + +Ref: + +## Slideshows with JupyterLab and RISE + +[RISE](https://github.com/jupyterlab-contrib/rise): "Live" Reveal.js JupyterLab Slideshow Extension. + +```{note} +We're providing the recipe to install the JupyterLab extension. +You can find the original Jupyter Notebook extension [here](https://github.com/damianavila/RISE) +``` + +```{literalinclude} recipe_code/rise_jupyterlab.dockerfile +:language: docker +``` + +## xgboost + +```{literalinclude} recipe_code/xgboost.dockerfile +:language: docker +``` + +## Running behind an nginx proxy + +```{warning} +This recipe is not tested and might be broken. +``` + +Sometimes it is helpful to run the Jupyter instance behind an nginx proxy, for example: + +- you would prefer to access the notebook at a server URL with a path + (`https://example.com/jupyter`) rather than a port (`https://example.com:8888`) +- you may have many services in addition to Jupyter running on the same server + and want nginx to help improve server performance in managing the connections + +Here is a [quick example of NGINX configuration](https://gist.github.com/cboettig/8643341bd3c93b62b5c2) to get started. +You'll need a server, a `.crt`, and a `.key` file for your server, and `docker` & `docker-compose` installed. +Then download the files at that gist and run `docker-compose up` to test it out. +Customize the `nginx.conf` file to set the desired paths and add other services. + +## Host volume mounts and notebook errors + +If you are mounting a host directory as `/home/jovyan/work` in your container, +and you receive permission errors or connection errors when you create a notebook, +be sure that the `jovyan` user (`UID=1000` by default) has read/write access to the directory on the host. +Alternatively, specify the UID of the `jovyan` user on container startup using the `-e NB_UID` option +described in the [Common Features, Docker Options section](common.md#docker-options) + +Ref: + +## Manpage installation + +Most containers, including our Ubuntu base image, ship without manpages installed to save space. +You can use the following Dockerfile to inherit from one of our images to enable manpages: + +```{literalinclude} recipe_code/manpage_install.dockerfile +:language: docker +``` + +Adding the documentation on top of the existing image wastes a lot of space +and requires reinstalling every system package, +which can take additional time and bandwidth. +Enabling manpages in the base Ubuntu layer prevents this container bloat. +To achieve this, use the previous `Dockerfile`'s commands with the original `ubuntu` image as your base container: + +```dockerfile +FROM ubuntu:22.04 +``` + +Be sure to check the current base image in `jupyter/docker-stacks-foundation` before building. + +## JupyterHub + +We also have contributed recipes for using JupyterHub. + +### Use JupyterHub's DockerSpawner + +You can find an example of using DockerSpawner [here](https://github.com/jupyterhub/jupyterhub-deploy-docker/tree/main/basic-example). + +### Containers with a specific version of JupyterHub + +The version of `jupyterhub` in your image should match the +version in JupyterHub itself. +To use a specific version of JupyterHub, do the following: + +```{literalinclude} recipe_code/jupyterhub_version.dockerfile +:language: docker +``` + +## Spark + +A few suggestions have been made regarding using Docker Stacks with Spark. + +### Using PySpark with AWS S3 + +```{warning} +This recipe is not tested and might be broken. +``` + +Using Spark session for Hadoop 2.7.3 + +```python +import os + +# To figure out what version of Hadoop, run: +# ls /usr/local/spark/jars/hadoop* +os.environ["PYSPARK_SUBMIT_ARGS"] = ( + '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell' +) + +import pyspark + +myAccessKey = input() +mySecretKey = input() + +spark = ( + pyspark.sql.SparkSession.builder.master("local[*]") + .config("spark.hadoop.fs.s3a.access.key", myAccessKey) + .config("spark.hadoop.fs.s3a.secret.key", mySecretKey) + .getOrCreate() +) + +df = spark.read.parquet("s3://myBucket/myKey") +``` + +Using Spark context for Hadoop 2.6.0 + +```python +import os + +os.environ["PYSPARK_SUBMIT_ARGS"] = ( + "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell" +) + +import pyspark + +sc = pyspark.SparkContext("local[*]") + +from pyspark.sql import SQLContext + +sqlContext = SQLContext(sc) + +hadoopConf = sc._jsc.hadoopConfiguration() +myAccessKey = input() +mySecretKey = input() +hadoopConf.set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem") +hadoopConf.set("fs.s3.awsAccessKeyId", myAccessKey) +hadoopConf.set("fs.s3.awsSecretAccessKey", mySecretKey) + +df = sqlContext.read.parquet("s3://myBucket/myKey") +``` + +Ref: + +### Using Local Spark JARs + +```{warning} +This recipe is not tested and might be broken. +``` + +```python +import os + +os.environ["PYSPARK_SUBMIT_ARGS"] = ( + "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell" +) +import pyspark +from pyspark.streaming.kafka import KafkaUtils +from pyspark.streaming import StreamingContext + +sc = pyspark.SparkContext() +ssc = StreamingContext(sc, 1) +broker = "" +directKafkaStream = KafkaUtils.createDirectStream( + ssc, ["test1"], {"metadata.broker.list": broker} +) +directKafkaStream.pprint() +ssc.start() +``` + +Ref: + +### Using spark-packages.org + +```{warning} +This recipe is not tested and might be broken. +``` + +If you'd like to use packages from [spark-packages.org](https://spark-packages.org/), see +[https://gist.github.com/parente/c95fdaba5a9a066efaab](https://gist.github.com/parente/c95fdaba5a9a066efaab) +for an example of how to specify the package identifier in the environment before creating a +SparkContext. + +Ref: + +### Use jupyter/all-spark-notebooks with an existing Spark/YARN cluster + +```{warning} +This recipe is not tested and might be broken. +``` + +```dockerfile +FROM quay.io/jupyter/all-spark-notebook + +# Set env vars for pydoop +ENV HADOOP_HOME /usr/local/hadoop-2.7.3 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 +ENV HADOOP_CONF_HOME /usr/local/hadoop-2.7.3/etc/hadoop +ENV HADOOP_CONF_DIR /usr/local/hadoop-2.7.3/etc/hadoop + +USER root +# Add proper open-jdk-8 not the jre only, needed for pydoop +RUN echo 'deb https://cdn-fastly.deb.debian.org/debian jessie-backports main' > /etc/apt/sources.list.d/jessie-backports.list && \ + apt-get update --yes && \ + apt-get install --yes --no-install-recommends -t jessie-backports openjdk-8-jdk && \ + rm /etc/apt/sources.list.d/jessie-backports.list && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ +# Add Hadoop binaries + wget --progress=dot:giga https://mirrors.ukfast.co.uk/sites/ftp.apache.org/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz && \ + tar -xvf hadoop-2.7.3.tar.gz -C /usr/local && \ + chown -R "${NB_USER}:users" /usr/local/hadoop-2.7.3 && \ + rm -f hadoop-2.7.3.tar.gz && \ +# Install os dependencies required for pydoop, pyhive + apt-get update --yes && \ + apt-get install --yes --no-install-recommends build-essential python-dev libsasl2-dev && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ +# Remove the example hadoop configs and replace +# with those for our cluster. +# Alternatively, this could be mounted as a volume + rm -f /usr/local/hadoop-2.7.3/etc/hadoop/* + +# Download this from ambari/cloudera manager and copy it here +COPY example-hadoop-conf/ /usr/local/hadoop-2.7.3/etc/hadoop/ + +# Spark-Submit doesn't work unless I set the following +RUN echo "spark.driver.extraJavaOptions -Dhdp.version=2.5.3.0-37" >> /usr/local/spark/conf/spark-defaults.conf && \ + echo "spark.yarn.am.extraJavaOptions -Dhdp.version=2.5.3.0-37" >> /usr/local/spark/conf/spark-defaults.conf && \ + echo "spark.master=yarn" >> /usr/local/spark/conf/spark-defaults.conf && \ + echo "spark.hadoop.yarn.timeline-service.enabled=false" >> /usr/local/spark/conf/spark-defaults.conf && \ + chown -R "${NB_USER}:users" /usr/local/spark/conf/spark-defaults.conf && \ + # Create an alternative HADOOP_CONF_HOME so we can mount as a volume and repoint + # using ENV var if needed + mkdir -p /etc/hadoop/conf/ && \ + chown "${NB_USER}":users /etc/hadoop/conf/ + +USER ${NB_UID} + +# Install useful jupyter extensions and python libraries like : +# - Dashboards +# - PyDoop +# - PyHive +RUN pip install --no-cache-dir 'jupyter_dashboards' 'faker' && \ + jupyter dashboards quick-setup --sys-prefix && \ + pip2 install --no-cache-dir 'pyhive' 'pydoop' 'thrift' 'sasl' 'thrift_sasl' 'faker' && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +USER root +# Ensure we overwrite the kernel config so that toree connects to cluster +RUN jupyter toree install --sys-prefix --spark_opts="\ + --master yarn \ + --deploy-mode client \ + --driver-memory 512m \ + --executor-memory 512m \ + --executor-cores 1 \ + --driver-java-options \ + -Dhdp.version=2.5.3.0-37 \ + --conf spark.hadoop.yarn.timeline-service.enabled=false \ +" +USER ${NB_UID} +``` + +Credit: [britishbadger](https://github.com/britishbadger) from [docker-stacks/issues/369](https://github.com/jupyter/docker-stacks/issues/369) + +## Run Server inside an already secured environment (i.e., with no token) + +The default security is very good. +There are use cases, encouraged by containers, where the jupyter container and the system it runs within lie inside the security boundary. +It is convenient to launch the server without a password or token in these use cases. +In this case, you should use the `start-notebook.py` script to launch the server with no token: + +For JupyterLab: + +```bash +docker run -it --rm \ + quay.io/jupyter/base-notebook \ + start-notebook.py --IdentityProvider.token='' +``` + +For Jupyter Notebook: + +```bash +docker run -it --rm \ + -e DOCKER_STACKS_JUPYTER_CMD=notebook \ + quay.io/jupyter/base-notebook \ + start-notebook.py --IdentityProvider.token='' +``` + +## Enable nbclassic-extension spellchecker for markdown (or any other nbclassic-extension) + +```{note} +This recipe only works for NBCassic with Jupyter Notebook < 7. +It is recommended to use [jupyterlab-spellchecker](https://github.com/jupyterlab-contrib/spellchecker) in modern environments. +``` + +```{literalinclude} recipe_code/spellcheck_notebook_v6.dockerfile +:language: docker +``` + +## Enable Delta Lake in Spark notebooks + +```{warning} +This recipe is not tested and might be broken. +``` + +Please note that the [Delta Lake](https://delta.io/) packages are only available for Spark version > `3.0`. +By adding the properties to `spark-defaults.conf`, the user no longer needs to enable Delta support in each notebook. + +```dockerfile +FROM quay.io/jupyter/pyspark-notebook + +RUN mamba install --yes 'delta-spark' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +USER root + +RUN echo 'spark.sql.extensions io.delta.sql.DeltaSparkSessionExtension' >> "${SPARK_HOME}/conf/spark-defaults.conf" && \ + echo 'spark.sql.catalog.spark_catalog org.apache.spark.sql.delta.catalog.DeltaCatalog' >> "${SPARK_HOME}/conf/spark-defaults.conf" + +USER ${NB_UID} + +# Trigger download of delta lake files +RUN echo "from pyspark.sql import SparkSession" > /tmp/init-delta.py && \ + echo "from delta import *" >> /tmp/init-delta.py && \ + echo "spark = configure_spark_with_delta_pip(SparkSession.builder).getOrCreate()" >> /tmp/init-delta.py && \ + python /tmp/init-delta.py && \ + rm /tmp/init-delta.py +``` + +## Add Custom Fonts in Scipy notebook + +```{warning} +This recipe is not tested and might be broken. +``` + +The example below is a Dockerfile to load Source Han Sans with normal weight, usually used for the web. + +```dockerfile +FROM quay.io/jupyter/scipy-notebook + +RUN PYV=$(ls "${CONDA_DIR}/lib" | grep ^python) && \ + MPL_DATA="${CONDA_DIR}/lib/${PYV}/site-packages/matplotlib/mpl-data" && \ + wget --progress=dot:giga -P "${MPL_DATA}/fonts/ttf/" https://mirrors.cloud.tencent.com/adobe-fonts/source-han-sans/SubsetOTF/CN/SourceHanSansCN-Normal.otf && \ + sed -i 's/#font.family/font.family/g' "${MPL_DATA}/matplotlibrc" && \ + sed -i 's/#font.sans-serif:/font.sans-serif: Source Han Sans CN,/g' "${MPL_DATA}/matplotlibrc" && \ + sed -i 's/#axes.unicode_minus: True/axes.unicode_minus: False/g' "${MPL_DATA}/matplotlibrc" && \ + rm -rf "/home/${NB_USER}/.cache/matplotlib" && \ + python -c 'import matplotlib.font_manager;print("font loaded: ",("Source Han Sans CN" in [f.name for f in matplotlib.font_manager.fontManager.ttflist]))' +``` + +## Enable clipboard in pandas on Linux systems + +```{warning} +This recipe is not tested and might be broken. +``` + +```{admonition} Additional notes + This solution works on Linux host systems. + It is not required on Windows and won't work on macOS. +``` + +To enable the `pandas.read_clipboard()` functionality, you need to have `xclip` installed +(installed in `minimal-notebook` and all the inherited images) +and add these options when running `docker`: `-e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix`, i.e.: + +```bash +docker run -it --rm \ + -e DISPLAY \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + quay.io/jupyter/minimal-notebook +``` + +## Add ijavascript kernel to container + +```{warning} +This recipe is not tested and might be broken. +``` + +The example below is a Dockerfile to install the [ijavascript kernel](https://github.com/n-riesco/ijavascript). + +```dockerfile +FROM quay.io/jupyter/scipy-notebook + +# install ijavascript +RUN npm install -g ijavascript +RUN ijsinstall +``` + +## Add Microsoft SQL Server ODBC driver + +The following recipe demonstrates how to add functionality to read from and write to an instance of Microsoft SQL server in your notebook. + +```{literalinclude} recipe_code/microsoft_odbc.dockerfile +:language: docker +``` + +You can now use `pyodbc` and `sqlalchemy` to interact with the database. + +Pre-built images are hosted in the [Realiserad/jupyter-docker-mssql](https://github.com/Realiserad/jupyter-docker-mssql) repository. + +## Add Oracle SQL Instant client, SQL\*Plus, and other tools (Version 21.x) + +```{note} +This recipe only works for x86_64 architecture. +``` + +The following recipe demonstrates how to add functionality to connect to an Oracle Database using [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html) +in your notebook. +This recipe installs version `21.11.0.0.0`. + +Nonetheless, go to the [Oracle Instant Client Download page](https://www.oracle.com/es/database/technologies/instant-client/linux-x86-64-downloads.html) for the complete list of versions available. +You may need to perform different steps for older versions; +they may be explained in the "Installation instructions" section of the Downloads page. + +```{literalinclude} recipe_code/oracledb.dockerfile +:language: docker +``` diff --git a/docker-stacks/docs/using/running.md b/docker-stacks/docs/using/running.md new file mode 100644 index 0000000..b652b92 --- /dev/null +++ b/docker-stacks/docs/using/running.md @@ -0,0 +1,192 @@ +# Running a Container + +Using one of the Jupyter Docker Stacks requires two choices: + +1. Which Docker image you wish to use +2. How you wish to start Docker containers from that image + +This section provides details about the second. + +## Using the Docker CLI + +You can launch a local Docker container from the Jupyter Docker Stacks using the [Docker command-line interface](https://docs.docker.com/engine/reference/commandline/cli/). +There are numerous ways to configure containers using CLI. +The following are some common patterns. + +### Example 1 + +This command pulls the `jupyter/scipy-notebook` image tagged `2024-01-15` from Quay.io if it is not already present on the local host. +It then starts a container running Jupyter Server with the JupyterLab frontend and exposes the server on host port 8888. +The server logs appear in the terminal and include a URL to the server. + +```bash +docker run -it -p 8888:8888 quay.io/jupyter/scipy-notebook:2024-01-15 + +# Entered start.sh with args: jupyter lab + +# ... + +# To access the server, open this file in a browser: +# file:///home/jovyan/.local/share/jupyter/runtime/jpserver-7-open.html +# Or copy and paste one of these URLs: +# http://eca4aa01751c:8888/lab?token=d4ac9278f5f5388e88097a3a8ebbe9401be206cfa0b83099 +# http://127.0.0.1:8888/lab?token=d4ac9278f5f5388e88097a3a8ebbe9401be206cfa0b83099 +``` + +Pressing `Ctrl-C` twice shuts down the Server but leaves the container intact on disk for later restart or permanent deletion using commands like the following: + +```bash +# list containers +docker ps --all +# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +# eca4aa01751c quay.io/jupyter/scipy-notebook:2024-01-15 "tini -g -- start-no…" About a minute ago Exited (0) 5 seconds ago silly_panini + +# start the stopped container +docker start --attach -i eca4aa01751c +# Entered start.sh with args: jupyter lab +# ... + +# remove the stopped container +docker rm eca4aa01751c +# eca4aa01751c +``` + +### Example 2 + +This command pulls the `jupyter/r-notebook` image tagged `2024-01-15` from Quay.io if it is not already present on the local host. +It then starts a container running Server and exposes the server on host port 10000. +The server logs appear in the terminal and include a URL to the Server but with the internal container port (8888) instead of the correct host port (10000). + +```bash +docker run -it --rm -p 10000:8888 -v "${PWD}":/home/jovyan/work quay.io/jupyter/r-notebook:2024-01-15 +``` + +Pressing `Ctrl-C` twice shuts down the Server and immediately destroys the Docker container. +New files and changes in `~/work` in the container will be preserved. +Any other changes made in the container will be lost. + +```{note} +By default, [jupyter's root_dir](https://jupyter-server.readthedocs.io/en/latest/other/full-config.html) is `/home/jovyan`. +So, new notebooks will be saved there, unless you change the directory in the file browser. + +To change the default directory, you will need to specify `ServerApp.root_dir` by adding this line to the previous command: `start-notebook.py --ServerApp.root_dir=/home/jovyan/work`. +``` + +### Example 3 + +This command pulls the `jupyter/all-spark-notebook` image currently tagged `latest` from Quay.io if an image tagged `latest` is not already present on the local host. +It then starts a container named `notebook` running a JupyterLab server and exposes the server on a randomly selected port. + +```bash +docker run --detach -P --name notebook quay.io/jupyter/all-spark-notebook +``` + +where: + +- `--detach`: will run the container in detached mode + +You can also use the following docker commands to see the port and Jupyter Server token: + +```bash +# get the random host port assigned to the container port 8888 +docker port notebook 8888 +# 0.0.0.0:49153 +# :::49153 + +# get the notebook token from the logs +docker logs --tail 3 notebook + # Or copy and paste one of these URLs: + # http://878f1a9b4dfa:8888/lab?token=d336fa63c03f064ff15ce7b269cab95b2095786cf9ab2ba3 + # or http://127.0.0.1:8888/lab?token=d336fa63c03f064ff15ce7b269cab95b2095786cf9ab2ba3 +``` + +Together, the URL to visit on the host machine to access the server, in this case, is . + +The container runs in the background until stopped and/or removed by additional Docker commands: + +```bash +# stop the container +docker stop notebook +# notebook + +# remove the container permanently +docker rm notebook +# notebook +``` + +## Using the Podman CLI + +An alternative to using the Docker CLI is to use the Podman CLI. +Podman is mostly compatible with Docker. + +### Podman example + +If we use Podman instead of Docker in the situation given in _Example 2_, it will look like this: + +The example makes use of rootless Podman; in other words, the Podman command is run from a regular user account. +In a Bash shell, set the shell variables _uid_ and _gid_ to the UID and GID of the user _jovyan_ in the container. + +```bash +uid=1000 +gid=100 +``` + +Set the shell variables _subuidSize_ and _subgidSize_ to the number of subordinate UIDs and GIDs, respectively. + +```bash +subuidSize=$(( $(podman info --format "{{ range .Host.IDMappings.UIDMap }}+{{.Size }}{{end }}" ) - 1 )) +subgidSize=$(( $(podman info --format "{{ range .Host.IDMappings.GIDMap }}+{{.Size }}{{end }}" ) - 1 )) +``` + +This command pulls the `quay.io/jupyter/r-notebook` image tagged `2024-01-15` from Quay.io if it is not already present on the local host. +It then starts a container running a Jupyter Server with the JupyterLab frontend and exposes the server on host port 10000. +The server logs appear in the terminal and include a URL to the server but with the internal container port (8888) instead of the correct host port (10000). + +```bash +podman run -it --rm -p 10000:8888 \ + -v "${PWD}":/home/jovyan/work --user $uid:$gid \ + --uidmap $uid:0:1 --uidmap 0:1:$uid --uidmap $(($uid+1)):$(($uid+1)):$(($subuidSize-$uid)) \ + --gidmap $gid:0:1 --gidmap 0:1:$gid --gidmap $(($gid+1)):$(($gid+1)):$(($subgidSize-$gid)) \ + quay.io/jupyter/r-notebook:2024-01-15 +``` + +```{warning} +The `podman run` options `--uidmap` and `--gidmap` can be used to map the container user _jovyan_ to the regular user on the host when running rootless Podman. +The same Podman command should not be run with sudo (i.e. running rootful Podman) +because then the mapping would map the container user _jovyan_ to the root user on the host. +It's a good security practice to run programs with as few privileges as possible. +``` + +```{note} +The `podman run` command in the example above maps all subuids and subgids of the user into the container. +That works fine but is actually more than needed. +The `podman run` option `--userns=auto` will, for instance, not be possible to use as long as there are no unused subuids and subgids available. +The example could be improved by investigating more in detail which UIDs and GIDs need to be available in the container and then only map them. +``` + +Pressing `Ctrl-C` twice shuts down the Server and immediately destroys the Docker container. +New files and changes in `~/work` in the container will be preserved. +Any other changes made in the container will be lost. + +## Using Binder + +A [Binder](https://mybinder.org/) is a service that allows you to create and share custom computing environments for projects in version control. +You can use any of the Jupyter Docker Stacks images as a basis for a Binder-compatible Dockerfile. +See the +[docker-stacks example](https://mybinder.readthedocs.io/en/latest/examples/sample_repos.html#using-a-docker-image-from-the-jupyter-docker-stacks-repository) and +[Using a Dockerfile](https://mybinder.readthedocs.io/en/latest/tutorials/dockerfile.html) section in the +[Binder documentation](https://mybinder.readthedocs.io/en/latest/index.html) for instructions. + +## Using JupyterHub + +You can configure JupyterHub to launcher Docker containers from the Jupyter Docker Stacks images. +If you've been following the [Zero to JupyterHub with Kubernetes](https://z2jh.jupyter.org/en/latest/) guide, +see the [Use an existing Docker image](https://z2jh.jupyter.org/en/latest/jupyterhub/customizing/user-environment.html#choose-and-use-an-existing-docker-image) section for details. +If you have a custom JupyterHub deployment, see the [Picking or building a Docker image](https://jupyterhub-dockerspawner.readthedocs.io/en/latest/docker-image.html) +instructions for the [dockerspawner](https://github.com/jupyterhub/dockerspawner) instead. + +## Using Other Tools and Services + +You can use the Jupyter Docker Stacks with any Docker-compatible technology +(e.g., [Docker Compose](https://docs.docker.com/compose/), [docker-py](https://github.com/docker/docker-py), or your favorite cloud container service). +See the documentation of the tool, library, or service for details about how to reference, configure, and launch containers from these images. diff --git a/docker-stacks/docs/using/selecting.md b/docker-stacks/docs/using/selecting.md new file mode 100644 index 0000000..d44f456 --- /dev/null +++ b/docker-stacks/docs/using/selecting.md @@ -0,0 +1,337 @@ +# Selecting an Image + +- [Core Stacks](#core-stacks) +- [Image Relationships](#image-relationships) +- [Community Stacks](#community-stacks) + +Using one of the Jupyter Docker Stacks requires two choices: + +1. Which Docker image you wish to use +2. How you wish to start Docker containers from that image + +This section provides details about the first. + +## Core Stacks + +The Jupyter team maintains a set of Docker image definitions in the GitHub repository. +The following sections describe these images, including their contents, relationships, and versioning strategy. + +### jupyter/docker-stacks-foundation + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/docker-stacks-foundation) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/docker-stacks-foundation/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/docker-stacks-foundation?tab=tags) + +`jupyter/docker-stacks-foundation` is a small image supporting a majority of [options common across all core stacks](common.md). +It is the basis for all other stacks on which Jupyter-related applications can be built +(e.g., kernel-based containers, [nbclient](https://github.com/jupyter/nbclient) applications, etc.). +As such, it does not contain application-level software like JupyterLab, Jupyter Notebook, or JupyterHub. + +It contains: + +- Package managers + - [conda](https://github.com/conda/conda): "cross-platform, language-agnostic binary package manager". + - [mamba](https://github.com/mamba-org/mamba): "reimplementation of the conda package manager in C++". We use this package manager by default when installing packages. +- Unprivileged user `jovyan` (`uid=1000`, configurable, [see options in the common features section](./common.md) of this documentation) in group `users` (`gid=100`) + with ownership over the `/home/jovyan` and `/opt/conda` paths +- `tini` and a `start.sh` script as the container entry point - useful for running alternative commands in the container as applications are added (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`) +- A `run-hooks.sh` script, which can source/run files in a given directory +- Options for a passwordless sudo +- Common system libraries like `bzip2`, `ca-certificates`, `locales` +- `wget` to download external files +- No preinstalled scientific computing packages + +### jupyter/base-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/base-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/base-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/base-notebook?tab=tags) + +`jupyter/base-notebook` adds base Jupyter Applications like JupyterLab, Jupyter Notebook, JupyterHub, and NBClassic +and serves as the basis for all other stacks besides `jupyter/docker-stacks-foundation`. + +It contains: + +- Everything in `jupyter/docker-stacks-foundation` +- Minimally functional Server (e.g., no LaTeX support for saving notebooks as PDFs) +- `notebook`, `jupyterhub`, and `jupyterlab` packages +- A `start-notebook.py` script as the default command +- A `start-singleuser.py` script useful for launching containers in JupyterHub +- Options for a self-signed HTTPS certificate + +```{warning} +`jupyter/base-notebook` also contains `start-notebook.sh` and `start-singleuser.sh` files to maintain backward compatibility. +External config that explicitly refers to those files should instead +update to refer to `start-notebook.py` and `start-singleuser.py`. +The shim `.sh` files will be removed at some future date. +``` + +### jupyter/minimal-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/minimal-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/minimal-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/minimal-notebook?tab=tags) + +`jupyter/minimal-notebook` adds command-line tools useful when working in Jupyter applications. + +It contains: + +- Everything in `jupyter/base-notebook` +- Common useful utilities like + [curl](https://curl.se), + [git](https://git-scm.com/), + [nano](https://www.nano-editor.org/) (actually `nano-tiny`), + [tzdata](https://www.iana.org/time-zones), + [unzip](https://code.launchpad.net/ubuntu/+source/unzip), + and [vi](https://www.vim.org) (actually `vim-tiny`), +- [TeX Live](https://www.tug.org/texlive/) for notebook document conversion + +### jupyter/r-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/r-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/r-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/r-notebook?tab=tags) + +`jupyter/r-notebook` includes popular packages from the R ecosystem listed below: + +- Everything in `jupyter/minimal-notebook` and its ancestor images +- The [R](https://www.r-project.org/) interpreter and base environment +- [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks +- [tidyverse](https://www.tidyverse.org/) + packages from [conda-forge](https://conda-forge.org/feedstock-outputs/index.html) +- [caret](https://topepo.github.io/caret/index.html), + [crayon](https://cran.r-project.org/web/packages/crayon/index.html), + [devtools](https://cran.r-project.org/web/packages/devtools/index.html), + [forecast](https://cran.r-project.org/web/packages/forecast/index.html), + [hexbin](https://cran.r-project.org/web/packages/hexbin/index.html), + [htmltools](https://cran.r-project.org/web/packages/htmltools/index.html), + [htmlwidgets](https://www.htmlwidgets.org), + [nycflights13](https://cran.r-project.org/web/packages/nycflights13/index.html), + [randomforest](https://cran.r-project.org/web/packages/randomForest/index.html), + [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), + [rmarkdown](https://rmarkdown.rstudio.com), + [rodbc](https://cran.r-project.org/web/packages/RODBC/index.html), + [rsqlite](https://cran.r-project.org/web/packages/RSQLite/index.html), + [shiny](https://shiny.posit.co), + [tidymodels](https://www.tidymodels.org/), + [unixodbc](https://www.unixodbc.org) + packages from [conda-forge](https://conda-forge.org/feedstock-outputs/index.html) + +### jupyter/julia-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/julia-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/julia-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/julia-notebook?tab=tags) + +`jupyter/julia-notebook` includes popular packages from the Julia ecosystem listed below: + +- Everything in `jupyter/minimal-notebook` and its ancestor images +- The [Julia](https://julialang.org/) compiler and base environment +- [IJulia](https://github.com/JuliaLang/IJulia.jl) to support Julia code in Jupyter notebook +- [Pluto.jl](https://plutojl.org/) reactive Julia notebook interface, made accessible with [jupyter-pluto-proxy](https://github.com/yuvipanda/jupyter-pluto-proxy) +- [HDF5](https://github.com/JuliaIO/HDF5.jl) package + +### jupyter/scipy-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/scipy-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/scipy-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/scipy-notebook?tab=tags) + +`jupyter/scipy-notebook` includes popular packages from the scientific Python ecosystem. + +- Everything in `jupyter/minimal-notebook` and its ancestor images +- [altair](https://altair-viz.github.io), + [beautifulsoup4](https://www.crummy.com/software/BeautifulSoup/), + [bokeh](https://docs.bokeh.org/en/latest/), + [bottleneck](https://bottleneck.readthedocs.io/en/latest/), + [cloudpickle](https://github.com/cloudpipe/cloudpickle), + [conda-forge::blas=\*=openblas](https://www.openblas.net), + [cython](https://cython.org), + [dask](https://www.dask.org/), + [dill](https://pypi.org/project/dill/), + [h5py](https://www.h5py.org), + [jupyterlab-git](https://github.com/jupyterlab/jupyterlab-git), + [matplotlib-base](https://matplotlib.org/), + [numba](https://numba.pydata.org/), + [numexpr](https://github.com/pydata/numexpr), + [openpyxl](https://openpyxl.readthedocs.io/en/stable/), + [pandas](https://pandas.pydata.org/), + [patsy](https://patsy.readthedocs.io/en/latest/), + [protobuf](https://protobuf.dev/getting-started/pythontutorial/), + [pytables](https://www.pytables.org/), + [scikit-image](https://scikit-image.org), + [scikit-learn](https://scikit-learn.org/stable/), + [scipy](https://scipy.org/), + [seaborn](https://seaborn.pydata.org/), + [sqlalchemy](https://www.sqlalchemy.org/), + [statsmodel](https://www.statsmodels.org/stable/index.html), + [sympy](https://www.sympy.org/en/index.html), + [widgetsnbextension](https://ipywidgets.readthedocs.io/en/latest/user_install.html#installing-in-classic-jupyter-notebook), + [xlrd](https://www.python-excel.org) + packages +- [ipympl](https://github.com/matplotlib/ipympl) and + [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) + for interactive visualizations and plots in Python notebooks +- [Facets](https://github.com/PAIR-code/facets) + for visualizing machine learning datasets + +### jupyter/tensorflow-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/tensorflow-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/tensorflow-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/tensorflow-notebook?tab=tags) + +`jupyter/tensorflow-notebook` includes popular Python deep learning libraries. + +- Everything in `jupyter/scipy-notebook` and its ancestor images +- [tensorflow](https://www.tensorflow.org/) machine learning library + +### jupyter/pytorch-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/pytorch-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/pytorch-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/pytorch-notebook?tab=tags) + +`jupyter/pytorch-notebook` includes popular Python deep learning libraries. + +- Everything in `jupyter/scipy-notebook` and its ancestor images +- [pytorch](https://pytorch.org/) machine learning library + +### jupyter/datascience-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/datascience-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/datascience-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/datascience-notebook?tab=tags) + +`jupyter/datascience-notebook` includes libraries for data analysis from the Python, R, and Julia communities. + +- Everything in the `jupyter/scipy-notebook`, `jupyter/r-notebook`, and `jupyter/julia-notebook` images and their ancestor + images +- [rpy2](https://rpy2.github.io/doc/latest/html/index.html) package + +### jupyter/pyspark-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/pyspark-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/pyspark-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/pyspark-notebook?tab=tags) + +`jupyter/pyspark-notebook` includes Python support for Apache Spark. + +- Everything in `jupyter/scipy-notebook` and its ancestor images +- [Apache Spark](https://spark.apache.org/) with Hadoop binaries +- [grpcio-status](https://github.com/grpc/grpc/tree/master/src/python/grpcio_status) +- [grpcio](https://grpc.io/docs/languages/python/quickstart/) +- [pyarrow](https://arrow.apache.org/docs/python/) + +### jupyter/all-spark-notebook + +[Source on GitHub](https://github.com/jupyter/docker-stacks/tree/main/images/all-spark-notebook) | +[Dockerfile commit history](https://github.com/jupyter/docker-stacks/commits/main/images/all-spark-notebook/Dockerfile) | +[Quay.io image tags](https://quay.io/repository/jupyter/all-spark-notebook?tab=tags) + +`jupyter/all-spark-notebook` includes Python and R support for Apache Spark. + +- Everything in `jupyter/pyspark-notebook` and its ancestor images +- [IRKernel](https://irkernel.github.io/) to support R code in Jupyter notebooks +- [rcurl](https://cran.r-project.org/web/packages/RCurl/index.html), + [sparklyr](https://spark.rstudio.com), + [ggplot2](https://ggplot2.tidyverse.org) + packages + +### Image Relationships + +The following diagram depicts the build dependency tree of the core images. (i.e., the `FROM` statements in their Dockerfiles). +Any given image inherits the complete content of all ancestor images pointing to it. + +[![Image inheritance +diagram](../images/inherit.svg)](http://interactive.blockdiag.com/?compression=deflate&src=eJyFz8FOwzAMgOH7nsLqCQ55ADTBE3CDIxJyU5eZZnaUOJoK2rsv4YCUSlOvv784yRjULxPjF_weACaasQT7nFUs8w_BMzwda9fEJIbGKjVFTZaQ7Xioo6GMRax8yMPr-xtc2E51zmKQKBBmehzAvcBUb6HksqFfspu1yPS3rS2_N2vnxrrBiRqNqkvDXWjizMJnDB3atuay57h2qi_NDEaSNc1BL_99uEPjapr8ac_Vr2CtJJ52n5h2xXcJjDufiGuOmJZObVtzGILbyusNkda3zw) + +### Builds + +Every Monday and whenever a pull request is merged, images are rebuilt and pushed to [the public container registry](https://quay.io/organization/jupyter). + +### Versioning via image tags + +Whenever a docker image is pushed to the container registry, it is tagged with: + +- the `latest` tag +- a 12-character git commit SHA like `1ffe43816ba9` +- a date formatted like `2023-01-30` +- OS version like `ubuntu-22.04` +- a set of software version tags like `python-3.10.8` and `lab-3.5.3` + +```{warning} +- Tags before `2022-07-05` were sometimes incorrect. + Please, do not rely on them. +- Single-platform images have either `aarch64-` or `x86_64-` tag prefixes, for example, `quay.io/jupyter/base-notebook:aarch64-python-3.11.6` +``` + +For stability and reproducibility, you should either reference a date formatted +tag from a date before the current date (in UTC) or a git commit SHA older +than the latest git commit SHA in the default branch of the +[jupyter/docker-stacks GitHub repository](https://github.com/jupyter/docker-stacks/). + +## Community Stacks + +The core stacks are but a tiny sample of what's possible when combining Jupyter with other technologies. +We encourage members of the Jupyter community to create their own stacks based on the core images and link them below. +See the [contributing guide](../contributing/stacks.md) for information about how to create your own Jupyter Docker Stack. + +| Flavor | Binder | Description | +| -------------- | ----------------------- | --------------------------------------------------------------------------------------------------------- | +| [csharp] | [![bb]][csharp_b] | More than 200 Jupyter Notebooks with example **C#** code | +| [education] | [![bb]][education_b] | **`nbgrader`** and `RISE` on top of the `datascience-notebook` image | +| [ihaskell] | [![bb]][ihaskell_b] | Based on [**IHaskell**][ihaskell_project]. Includes popular packages and example notebooks | +| [java] | [![bb]][java_b] | [**IJava**][ijava] kernel on top of the `minimal-notebook` image | +| [sage] | [![bb]][sage_b] | [**sagemath**][sagemath] kernel on top of the `minimal-notebook` image | +| [cgspatial] | [![bb]][cgspatial_b] | Major **geospatial** Python & R libraries on top of the `datascience-notebook` image | +| [kotlin] | [![bb]][kotlin_b] | [**Kotlin** kernel for Jupyter/IPython][kotlin_kernel] on top of the `base-notebook` image | +| [transformers] | [![bb]][transformers_b] | [**Transformers**][transformers_lib] and NLP libraries such as `Tensorflow`, `Keras`, `Jax` and `PyTorch` | +| [scraper] | [![bb]][scraper_b] | **Scraper** tools (`selenium`, `chromedriver`, `beatifulsoup4`, `requests`) on `minimal-notebook` image | +| [almond] | [![bb]][almond_b] | Scala kernel for Jupyter using **Almond** on top of the `base-notebook` image | + +[bb]: https://static.mybinder.org/badge_logo.svg +[csharp]: https://github.com/tlinnet/csharp-notebook +[csharp_b]: https://mybinder.org/v2/gh/tlinnet/csharp-notebook/master +[education]: https://github.com/umsi-mads/education-notebook +[education_b]: https://mybinder.org/v2/gh/umsi-mads/education-notebook/master +[ihaskell]: https://github.com/IHaskell/ihaskell-notebook +[ihaskell_b]: https://mybinder.org/v2/gh/jamesdbrock/learn-you-a-haskell-notebook/master?urlpath=lab/tree/ihaskell_examples/ihaskell/IHaskell.ipynb +[ihaskell_project]: https://github.com/IHaskell/IHaskell +[java]: https://github.com/jbindinga/java-notebook +[java_b]: https://mybinder.org/v2/gh/jbindinga/java-notebook/master +[ijava]: https://github.com/SpencerPark/IJava +[sage]: https://github.com/sharpTrick/sage-notebook +[sage_b]: https://mybinder.org/v2/gh/sharpTrick/sage-notebook/master +[sagemath]: https://www.sagemath.org +[cgspatial]: https://github.com/SCiO-systems/cgspatial-notebook +[cgspatial_b]: https://mybinder.org/v2/gh/SCiO-systems/cgspatial-notebook/master +[kotlin]: https://github.com/knonm/kotlin-notebook +[kotlin_b]: https://mybinder.org/v2/gh/knonm/kotlin-notebook/main +[kotlin_kernel]: https://github.com/Kotlin/kotlin-jupyter +[transformers]: https://github.com/ToluClassics/transformers_notebook +[transformers_b]: https://mybinder.org/v2/gh/ToluClassics/transformers_notebook/main +[transformers_lib]: https://huggingface.co/docs/transformers/index +[scraper]: https://github.com/rgriffogoes/scraper-notebook +[scraper_b]: https://mybinder.org/v2/gh/rgriffogoes/scraper-notebook/main +[almond]: https://almond.sh +[almond_b]: https://mybinder.org/v2/gh/almond-sh/examples/master?urlpath=lab%2Ftree%2Fnotebooks%2Findex.ipynb + +### GPU-accelerated notebooks + +| Flavor | Description | +| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| [GPU-Jupyter][gpu] | Power of your NVIDIA GPU and GPU calculations using Tensorflow and Pytorch in collaborative notebooks. This is done by generating a Dockerfile that consists of the **nvidia/cuda** base image, the well-maintained **docker-stacks** that is integrated as a submodule, and GPU-able libraries like **Tensorflow**, **Keras** and **PyTorch** on top of it. | +| [PRP-GPU][prp_gpu] | PRP (Pacific Research Platform) maintained [registry][prp_reg] for jupyter stack based on NVIDIA CUDA-enabled image. Added the PRP image with Pytorch and some other Python packages and GUI Desktop notebook based on . | +| [b-data][b-data] | GPU accelerated, multi-arch (`linux/amd64`, `linux/arm64/v8`) docker images for [R][r_cuda], [Python][python_cuda] and [Julia][julia_cuda]. Derived from nvidia/cuda `devel`-flavored images, including TensortRT and TensorRT plugin libraries. With [code-server][code-server] next to JupyterLab. Just Python – no [Conda][conda]/[Mamba][mamba]. | + +[gpu]: https://github.com/iot-salzburg/gpu-jupyter +[prp_gpu]: https://gitlab.nrp-nautilus.io/prp/jupyter-stack/-/tree/prp +[prp_reg]: https://gitlab.nrp-nautilus.io/prp/jupyter-stack/container_registry +[b-data]: https://github.com/b-data +[r_cuda]: https://github.com/b-data/jupyterlab-r-docker-stack/blob/main/CUDA.md +[python_cuda]: https://github.com/b-data/jupyterlab-python-docker-stack/blob/main/CUDA.md +[julia_cuda]: https://github.com/b-data/jupyterlab-julia-docker-stack/blob/main/CUDA.md +[code-server]: https://github.com/coder/code-server +[conda]: https://github.com/conda/conda +[mamba]: https://github.com/mamba-org/mamba diff --git a/docker-stacks/docs/using/specifics.md b/docker-stacks/docs/using/specifics.md new file mode 100644 index 0000000..c578e37 --- /dev/null +++ b/docker-stacks/docs/using/specifics.md @@ -0,0 +1,307 @@ +# Image Specifics + +This page provides details about features specific to one or more images. + +## Apache Spark™ + +### Specific Docker Image Options + +- `-p 4040:4040` - The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images open + [SparkUI (Spark Monitoring and Instrumentation UI)](https://spark.apache.org/docs/latest/monitoring.html) at default port `4040`, + this option maps the `4040` port inside the docker container to the `4040` port on the host machine. + + ```{note} + Every new spark context that is created is put onto an incrementing port (i.e. 4040, 4041, 4042, etc.), and it might be necessary to open multiple ports. + ``` + + For example, `docker run --detach -p 8888:8888 -p 4040:4040 -p 4041:4041 quay.io/jupyter/pyspark-notebook`. + +#### IPython low-level output capture and forward + +Spark images (`pyspark-notebook` and `all-spark-notebook`) have been configured to disable IPython low-level output capture and forward system-wide. +The rationale behind this choice is that Spark logs can be verbose, especially at startup when Ivy is used to load additional jars. +Those logs are still available but only in the container's logs. + +If you want to make them appear in the notebook, you can overwrite the configuration in a user-level IPython kernel profile. +To do that, you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel. + +```python +c.IPKernelApp.capture_fd_output = True +``` + +If you have no IPython profile, you can initiate a fresh one by running the following command. + +```bash +ipython profile create +# [ProfileCreate] Generating default config file: '/home/jovyan/.ipython/profile_default/ipython_config.py' +# [ProfileCreate] Generating default config file: '/home/jovyan/.ipython/profile_default/ipython_kernel_config.py' +``` + +### Build an Image with a Different Version of Spark + +You can build a `pyspark-notebook` image with a different `Spark` version by overriding the default value of the following arguments at build time. +`all-spark-notebook` is inherited from `pyspark-notebook`, so you have to first build `pyspark-notebook` and then `all-spark-notebook` to get the same version in `all-spark-notebook`. + +- Spark distribution is defined by the combination of Spark, Hadoop, and Scala versions, + see [Download Apache Spark](https://spark.apache.org/downloads.html) and the [archive repo](https://archive.apache.org/dist/spark/) for more information. + + - `openjdk_version`: The version of the OpenJDK (JRE headless) distribution (`17` by default). + - This version needs to match the version supported by the Spark distribution used above. + - See [Spark Overview](https://spark.apache.org/docs/latest/#downloading) and [Ubuntu packages](https://packages.ubuntu.com/search?keywords=openjdk). + - `spark_version` (optional): The Spark version to install, for example `3.5.0`. + If not specified (this is the default), latest stable Spark will be installed. + - `hadoop_version`: The Hadoop version (`3` by default). + Note, that _Spark < 3.3_ require to specify `major.minor` Hadoop version (i.e. `3.2`). + - `scala_version` (optional): The Scala version, for example `2.13` (not specified by default). + Starting with _Spark >= 3.2_, the distribution file might contain the Scala version. + - `spark_download_url`: URL to use for Spark downloads. + You may need to use url if you want to download old Spark versions. + +For example, here is how to build a `pyspark-notebook` image with Spark `3.2.0`, Hadoop `3.2`, and OpenJDK `11`. + +```{warning} +This recipe is not tested and might be broken. +``` + +```bash +# From the root of the project +# Build the image with different arguments +docker build --rm --force-rm \ + -t my-pyspark-notebook ./images/pyspark-notebook \ + --build-arg openjdk_version=11 \ + --build-arg spark_version=3.2.0 \ + --build-arg hadoop_version=3.2 \ + --build-arg spark_download_url="https://archive.apache.org/dist/spark/" + +# Check the newly built image +docker run -it --rm my-pyspark-notebook pyspark --version + +# Welcome to +# ____ __ +# / __/__ ___ _____/ /__ +# _\ \/ _ \/ _ `/ __/ '_/ +# /___/ .__/\_,_/_/ /_/\_\ version 3.2.0 +# /_/ + +# Using Scala version 2.12.15, OpenJDK 64-Bit Server VM, 11.0.21 +# Branch HEAD +# Compiled by user ubuntu on 2021-10-06T12:46:30Z +# Revision 5d45a415f3a29898d92380380cfd82bfc7f579ea +# Url https://github.com/apache/spark +# Type --help for more information. +``` + +### Usage Examples + +The `jupyter/pyspark-notebook` and `jupyter/all-spark-notebook` images support the use of [Apache Spark](https://spark.apache.org/) in Python and R notebooks. +The following sections provide some examples of how to get started using them. + +#### Using Spark Local Mode + +Spark **local mode** is useful for experimentation on small data when you do not have a Spark cluster available. + +```{warning} +In these examples, Spark spawns all the main execution components in the same single JVM. +You can read additional info about local mode [here](https://books.japila.pl/apache-spark-internals/local/). +If you want to use all the CPU, one of the simplest ways is to set up a [Spark Standalone Cluster](https://spark.apache.org/docs/latest/spark-standalone.html). +``` + +##### Local Mode in Python + +In a Python notebook. + +```python +from pyspark.sql import SparkSession + +# Spark session & context +spark = SparkSession.builder.master("local").getOrCreate() +sc = spark.sparkContext + +# Sum of the first 100 whole numbers +rdd = sc.parallelize(range(100 + 1)) +rdd.sum() +# 5050 +``` + +##### Local Mode in R + +In an R notebook with [SparkR][sparkr]. + +```R +library(SparkR) + +# Spark session & context +sc <- sparkR.session("local") + +# Sum of the first 100 whole numbers +sdf <- createDataFrame(list(1:100)) +dapplyCollect(sdf, + function(x) + { x <- sum(x)} + ) +# 5050 +``` + +In an R notebook with [sparklyr][sparklyr]. + +```R +library(sparklyr) + +# Spark configuration +conf <- spark_config() +# Set the catalog implementation in-memory +conf$spark.sql.catalogImplementation <- "in-memory" + +# Spark session & context +sc <- spark_connect(master = "local", config = conf) + +# Sum of the first 100 whole numbers +sdf_len(sc, 100, repartition = 1) %>% + spark_apply(function(e) sum(e)) +# 5050 +``` + +#### Connecting to a Spark Cluster in Standalone Mode + +Connection to Spark Cluster on **[Standalone Mode](https://spark.apache.org/docs/latest/spark-standalone.html)** requires the following set of steps: + +0. Verify that the docker image (check the Dockerfile) and the Spark Cluster, which is being + deployed, run the same version of Spark. +1. [Deploy Spark in Standalone Mode](https://spark.apache.org/docs/latest/spark-standalone.html). +2. Run the Docker container with `--net=host` in a location that is network-addressable by all of + your Spark workers. + (This is a [Spark networking requirement](https://spark.apache.org/docs/latest/cluster-overview.html#components).) + + ```{note} + When using `--net=host`, you must also use the flags `--pid=host -e TINI_SUBREAPER=true`. See for details._ + ``` + +**Note**: In the following examples, we are using the Spark master URL `spark://master:7077` which shall be replaced by the URL of the Spark master. + +##### Standalone Mode in Python + +The **same Python version** needs to be used on the notebook (where the driver is located) and on the Spark workers. +The Python version used on the driver and worker side can be adjusted by setting the environment variables `PYSPARK_PYTHON` and/or `PYSPARK_DRIVER_PYTHON`, +see [Spark Configuration][spark-conf] for more information. + +```python +from pyspark.sql import SparkSession + +# Spark session & context +spark = SparkSession.builder.master("spark://master:7077").getOrCreate() +sc = spark.sparkContext + +# Sum of the first 100 whole numbers +rdd = sc.parallelize(range(100 + 1)) +rdd.sum() +# 5050 +``` + +##### Standalone Mode in R + +In an R notebook with [SparkR][sparkr]. + +```R +library(SparkR) + +# Spark session & context +sc <- sparkR.session("spark://master:7077") + +# Sum of the first 100 whole numbers +sdf <- createDataFrame(list(1:100)) +dapplyCollect(sdf, + function(x) + { x <- sum(x)} + ) +# 5050 +``` + +In an R notebook with [sparklyr][sparklyr]. + +```R +library(sparklyr) + +# Spark session & context +# Spark configuration +conf <- spark_config() +# Set the catalog implementation in-memory +conf$spark.sql.catalogImplementation <- "in-memory" +sc <- spark_connect(master = "spark://master:7077", config = conf) + +# Sum of the first 100 whole numbers +sdf_len(sc, 100, repartition = 1) %>% + spark_apply(function(e) sum(e)) +# 5050 +``` + +### Define Spark Dependencies + +```{note} +This example is given for [Elasticsearch](https://www.elastic.co/guide/en/elasticsearch/hadoop/current/install.html). +``` + +Spark dependencies can be declared thanks to the `spark.jars.packages` property +(see [Spark Configuration](https://spark.apache.org/docs/latest/configuration.html#runtime-environment) for more information). + +They can be defined as a comma-separated list of Maven coordinates at the creation of the Spark session. + +```python +from pyspark.sql import SparkSession + +spark = ( + SparkSession.builder.appName("elasticsearch") + .config( + "spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" + ) + .getOrCreate() +) +``` + +Dependencies can also be defined in the `spark-defaults.conf`. +However, it has to be done by `root`, so it should only be considered to build custom images. + +```dockerfile +USER root +RUN echo "spark.jars.packages org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" >> "${SPARK_HOME}/conf/spark-defaults.conf" +USER ${NB_UID} +``` + +Jars will be downloaded dynamically at the creation of the Spark session and stored by default in `${HOME}/.ivy2/jars` (can be changed by setting `spark.jars.ivy`). + +## Tensorflow + +The `jupyter/tensorflow-notebook` image supports the use of +[Tensorflow](https://www.tensorflow.org/) in a single machine or distributed mode. + +### Single Machine Mode + +```python +import tensorflow as tf + +hello = tf.Variable("Hello World!") + +sess = tf.Session() +init = tf.global_variables_initializer() + +sess.run(init) +sess.run(hello) +``` + +### Distributed Mode + +```python +import tensorflow as tf + +hello = tf.Variable("Hello Distributed World!") + +server = tf.train.Server.create_local_server() +sess = tf.Session(server.target) +init = tf.global_variables_initializer() + +sess.run(init) +sess.run(hello) +``` + +[sparkr]: https://spark.apache.org/docs/latest/sparkr.html +[sparklyr]: https://spark.rstudio.com/ +[spark-conf]: https://spark.apache.org/docs/latest/configuration.html diff --git a/docker-stacks/docs/using/troubleshooting.md b/docker-stacks/docs/using/troubleshooting.md new file mode 100644 index 0000000..d9497ed --- /dev/null +++ b/docker-stacks/docs/using/troubleshooting.md @@ -0,0 +1,342 @@ +# Troubleshooting Common Problems + +When troubleshooting, you may see unexpected behaviors or receive an error message. +This section provides advice on how to identify and fix some of the most commonly encountered issues. + +Most of the `docker run` flags used in this document are explained in detail in the +[Common Features, Docker Options section](common.md#docker-options) of the documentation. + +## Permission denied when mounting volumes + +If you are running a Docker container while mounting a local volume or host directory using the `-v` flag like so: + +```bash +docker run -it --rm \ + -p 8888:8888 \ + -v : \ + quay.io/jupyter/minimal-notebook:latest +``` + +you might face permissions issues when trying to access the mounted volume: + +```bash +# assuming we mounted the volume in /home/jovyan/stagingarea +# root is the owner of the mounted volume +ls -ld ~/stagingarea/ +# drwxr-xr-x 2 root root 4096 Feb 1 12:55 stagingarea/ + +touch stagingarea/kale.txt +# touch: cannot touch 'stagingarea/kale.txt': Permission denied +``` + +In this case, the user of the container (`jovyan`) and the owner of the mounted volume (`root`) +have different permission levels and ownership over the container's directories and mounts. +The following sections cover a few of these scenarios and how to fix them. + +**Some things to try:** + +1. **Change ownership of the volume mount** + + You can change the ownership of the volume mount using the `chown` command. + In the case of the docker-stacks images, you can set the `CHOWN_EXTRA` and `CHOWN_EXTRA_OPTS` environment variables. + + For example, to change the ownership of the volume mount to the `jovyan` user (non-privileged default user in the Docker images): + + ```bash + # running in detached mode - can also be run in interactive mode + docker run --detach \ + -v : \ + -p 8888:8888 \ + --user root \ + -e CHOWN_EXTRA="" \ + -e CHOWN_EXTRA_OPTS="-R" \ + quay.io/jupyter/minimal-notebook + ``` + + where: + + - `CHOWN_EXTRA=,`: will change the ownership and group of the specified container directory (non-recursive by default). + You need to provide full paths starting with `/`. + - `CHOWN_EXTRA_OPTS="-R"`: will recursively change the ownership and group of the directory specified in `CHOWN_EXTRA`. + - `--user root`: you **must** run the container with the root user to change ownership at runtime. + + Now accessing the mount should work as expected: + + ```bash + # assuming we mounted the volume in /home/jovyan/stagingarea + ls -ld ~/stagingarea + # drwxr-xr-x 2 jovyan users 4096 Feb 1 12:55 stagingarea/ + + touch stagingarea/kale.txt + # jovyan is now the owner of /home/jovyan/stagingarea + # ls -la ~/stagingarea/ + # -rw-r--r-- 1 jovyan users 0 Feb 1 14:41 kale.txt + ``` + + ```{admonition} Additional notes + - If you are mounting your volume inside the `/home/` directory, you can use the `-e CHOWN_HOME=yes` and `CHOWN_HOME_OPTS="-R"` flags + instead of the `-e CHOWN_EXTRA` and `-e CHOWN_EXTRA_OPTS` in the example above. + - This solution should work in most cases where you have created a docker volume + (i.e. using the [`docker volume create --name `command](https://docs.docker.com/storage/volumes/#create-and-manage-volumes)) and mounted it using the `-v` flag in `docker run`. + ``` + +2. **Matching the container's UID/GID with the host's** + + Docker handles mounting host directories differently from mounting volumes, even though the syntax is essentially the same (i.e. `-v`). + + When you initialize a Docker container using the `-v`flag, the host directories are bind-mounted directly into the container. + Therefore, the permissions and ownership are copied over and will be **the same** as the ones in your local host + (including user ids) which may result in permissions errors when trying to access directories or create/modify files inside. + + Suppose your local user has a `UID` and `GID` of `1234` and `5678`, respectively. + To fix the UID discrepancies between your local directories and the container's directories, + you can run the container with an explicit `NB_UID` and `NB_GID` to match that of the local user: + + ```bash + docker run -it --rm \ + --user root \ + -p 8888:8888 \ + -e NB_UID=1234 \ + -e NB_GID=5678 \ + -v "${PWD}"/test:/home/jovyan/work \ + quay.io/jupyter/minimal-notebook:latest + + # you should see an output similar to this + # Update jovyan's UID:GID to 1234:5678 + # Running as jovyan: bash + ``` + + where: + + - `NB_IUD` and `NB_GID` should match the local user's UID and GID. + - You **must** use `--user root` to ensure that the `UID` and `GID` are updated at runtime. + +````{admonition} Additional notes +- The caveat with this approach is that since these changes are applied at runtime, + you will need to re-run the same command with the appropriate flags and environment variables + if you need to recreate the container (i.e. after removing/destroying it). + - If you pass a numeric UID, it **must** be in the range of 0-2147483647 + - This approach only updates the UID and GID of the **existing `jovyan` user** instead of creating a new user. + From the above example: + ```bash + id + # uid=1234(jovyan) gid=5678(jovyan) groups=5678(jovyan),100(users) + ``` +```` + +## Permission issues after changing the UID/GID and USER in the container + +If you have also **created a new user**, you might be experiencing any of the following issues: + +- the `root` user is the owner of `/home` or a mounted volume +- when starting the container, you get an error such as `Failed to change ownership of the home directory.` +- getting permission denied when trying to `conda install` packages + +**Some things to try:** + +1. **Ensure the new user has ownership of `/home` and volume mounts** + + For example, say you want to create a user `callisto` with a `GID` and `UID` of `1234`. + You will have to add the following flags to the docker run command: + + ```bash + docker run -it --rm \ + -p 8888:8888 \ + --user root \ + -e NB_USER=callisto \ + -e NB_UID=1234 \ + -e NB_GID=1234 \ + -e CHOWN_HOME=yes \ + -e CHOWN_HOME_OPTS="-R" \ + -w "/home/callisto" \ + -v "${PWD}"/test:/home/callisto/work \ + quay.io/jupyter/minimal-notebook + + # Updated the jovyan user: + # - username: jovyan -> callisto + # - home dir: /home/jovyan -> /home/callisto + # Update callisto UID:GID to 1234:1234 + # Attempting to copy /home/jovyan to /home/callisto... + # Success! + # Ensuring /home/callisto is owned by 1234:1234 + # Running as callisto: bash + ``` + + where: + + - `-e NB_USER=callisto`: will create a new user `callisto` and automatically add it to the `users` group (does not delete jovyan) + - `-e NB_UID=1234` and `-e NB_GID=1234`: will set the `UID` and `GID` of the new user (`callisto`) to `1234` + - `-e CHOWN_HOME_OPTS="-R"` and `-e CHOWN_HOME=yes`: ensure that the new user is the owner of the `/home` directory and subdirectories + (setting `CHOWN_HOME_OPTS="-R` will ensure this change is applied recursively) + - `-w "/home/callisto"` sets the working directory to be the new user's home + + ```{admonition} Additional notes + In the example above, the `-v` flag is used to mount the local volume onto the new user's `/home` directory. + + However, if you are mounting a volume elsewhere, + you also need to use the `-e CHOWN_EXTRA=` flag to avoid any permission issues + (see the section [Permission denied when mounting volumes](#permission-denied-when-mounting-volumes) on this page). + ``` + +2. **Dynamically assign the user ID and GID** + + The above case ensures that the `/home` directory is owned by a newly created user with a specific `UID` and `GID`, + but if you want to assign the `UID` and `GID` of the new user dynamically, + you can make the following adjustments: + + ```bash + docker run -it --rm \ + -p 8888:8888 \ + --user root \ + -e NB_USER=callisto \ + -e NB_UID="$(id -u)" \ + -e NB_GID="$(id -g)" \ + -e CHOWN_HOME=yes \ + -e CHOWN_HOME_OPTS="-R" \ + -w "/home/callisto" \ + -v "${PWD}"/test:/home/callisto/work \ + quay.io/jupyter/minimal-notebook + ``` + + where: + + - `"$(id -u)" and "$(id -g)"` will dynamically assign the `UID` and `GID` of the user executing the `docker run` command to the new user (`callisto`) + +## Additional tips and troubleshooting commands for permission-related errors + +- Pass absolute paths to the `-v` flag: + + ```bash + -v "${PWD}"/:/home/jovyan/work + ``` + + This example uses the syntax `${PWD}`, which is replaced with the full path to the current directory at runtime. + The destination path should also be an absolute path starting with a `/` such as `/home/jovyan/work`. + +- You might want to consider using the Docker native `--user ` and `--group-add users` flags instead of `-e NB_UID` and `-e NB_GID`: + + ```bash + # note this will use the same UID from + # the user calling the command, thus matching the local host + + docker run -it --rm \ + -p 8888:8888 \ + --user "$(id -u)" --group-add users \ + -v :/home/jovyan/work quay.io/jupyter/datascience-notebook + ``` + + This command will launch the container with a specific user UID and add that user to the `users` group + to modify the files in the default `/home` and `/opt/conda` directories. + Further avoiding issues when trying to `conda install` additional packages. + +- Use `docker inspect ` and look for the [`Mounts` section](https://docs.docker.com/storage/volumes/#start-a-container-with-a-volume) + to verify that the volume was created and mounted accordingly: + + ```json + { + "Mounts": [ + { + "Type": "volume", + "Name": "my-vol", + "Source": "/var/lib/docker/volumes/stagingarea/_data", + "Destination": "/home/jovyan/stagingarea", + "Driver": "local", + "Mode": "z", + "RW": true, + "Propagation": "" + } + ] + } + ``` + +## Problems installing conda packages from specific channels + +By default, the docker-stacks images have the conda channels priority set to `strict`. +This may cause problems when trying to install packages from a channel with lower priority. + +```bash +conda config --show | grep priority +# channel_priority: strict + +# to see its meaning +conda config --describe channel_priority + +# checking the current channels +conda config --show default_channels +# default_channels: +# - https://repo.anaconda.com/pkgs/main +# - https://repo.anaconda.com/pkgs/r +``` + +**Installing packages from alternative channels:** + +You can install packages from other conda channels (e.g. `bioconda`) by disabling the `channel_priority` setting: + +```bash +# install by disabling channel priority at еру command level +conda install --no-channel-priority -c bioconda bioconductor-geoquery +``` + +Additional details are provided in the [Using Alternative Channels](../using/common.md#using-alternative-channels) section of the [Common Features](common.md) page. + +## Tokens are being rejected + +If you are a regular user of VSCode and the Jupyter extension, +you might experience either of these issues when using any of the docker-stacks images: + +- when clicking on the URL displayed on your command line logs, you face a "This site cannot be reached" page on your web browser +- using the produced token and/or URL results in an "Invalid credentials" error on the Jupyter "Token authentication is enabled" page + + ```bash + # example log output from the docker run command + + # [...] + # Or copy and paste one of these URLs: + # http://3d4cf3809e3f:8888/?token=996426e890f8dc22fa6835a44442b6026cba02ee61fee6a2 + # or http://127.0.0.1:8888/?token=996426e890f8dc22fa6835a44442b6026cba02ee61fee6a2 + ``` + +**Some things to try:** + +1. **Find lingering Jupyter processes in the background** + + The first thing you want to try is to check that no other Jupyter processes are running in the background: + + ```bash + ps aux | grep jupyter + ``` + + If there are existing processes running, you can kill them with: + + ```bash + # example output from the above command + # my-user 3412 ... --daemon-module=vscode_datascience_helpers.jupyter_daemon + + # using the pid from the above log + kill 3412 + ``` + +2. **Turn off Jupyter auto-start in VSCode** + + Alternatively - you might want to ensure that the `Jupyter: Disable Jupyter Auto Start` setting is turned on to avoid this issue in the future. + + You can achieve this from the `Settings > Jupyter` menu in VScode: + + ![VSCode Settings UI - Jupyter: Disable Jupyter Auto Start checkbox checked](../_static/using/troubleshooting/vscode-jupyter-settings.png) + +3. **Route container to unused local port** + + Instead of mapping Docker port `8888` to local port `8888`, map to another unused local port. + You can see an example of mapping to local port `8001`: + + ```bash + docker run -it --rm -p 8001:8888 quay.io/jupyter/datascience-notebook + ``` + + When the terminal provides the link to access Jupyter: , + change the default port value of `8888` in the URL to the port value mapped with the `docker run` command. + + In this example, we use 8001, so the edited link would be: . + + Note: Port mapping for Jupyter has other applications outside of Docker. + For example, it can be used to allow multiple Jupyter instances when using SSH to control cloud devices. diff --git a/docker-stacks/examples/README.md b/docker-stacks/examples/README.md new file mode 100644 index 0000000..7bb53eb --- /dev/null +++ b/docker-stacks/examples/README.md @@ -0,0 +1,4 @@ +# Examples + +These examples are not tested and might not work. +Please, send PRs if you start using these examples and see some issues. diff --git a/docker-stacks/examples/docker-compose/README.md b/docker-stacks/examples/docker-compose/README.md new file mode 100644 index 0000000..d2c2e3e --- /dev/null +++ b/docker-stacks/examples/docker-compose/README.md @@ -0,0 +1,176 @@ +# Docker Compose example + +This example demonstrate how to deploy docker-stack notebook containers to any Docker Machine-controlled host using Docker Compose. + +## Prerequisites + +- [Docker Engine](https://docs.docker.com/engine/) 1.10.0+ +- [Docker Machine](https://docs.docker.com/machine/) 0.6.0+ +- [Docker Compose](https://docs.docker.com/compose/) 1.6.0+ + +See the [installation instructions](https://docs.docker.com/engine/installation/) for your environment. + +## Quickstart + +Build and run a `jupyter/minimal-notebook` container on a VirtualBox VM on local desktop. + +```bash +# create a Docker Machine-controlled VirtualBox VM +bin/vbox.sh mymachine + +# activate the docker machine +eval "$(docker-machine env mymachine)" + +# build the notebook image on the machine +notebook/build.sh + +# bring up the notebook container +notebook/up.sh +``` + +To stop and remove the container: + +```bash +notebook/down.sh +``` + +## FAQ + +### How do I specify which docker-stack notebook image to deploy? + +You can customize the docker-stack notebook image to deploy by modifying the `notebook/Dockerfile`. +For example, you can build and deploy a `jupyter/all-spark-notebook` by modifying the Dockerfile like so: + +```dockerfile +FROM quay.io/jupyter/all-spark-notebook +# Your RUN commands and so on +``` + +Once you modify the Dockerfile, don't forget to rebuild the image. + +```bash +# activate the docker machine +eval "$(docker-machine env mymachine)" + +notebook/build.sh +``` + +### Can I run multiple notebook containers on the same VM? + +Yes. Set environment variables to specify unique names and ports when running the `up.sh` command. + +```bash +NAME=my-notebook PORT=9000 notebook/up.sh +NAME=your-notebook PORT=9001 notebook/up.sh +``` + +To stop and remove the containers: + +```bash +NAME=my-notebook notebook/down.sh +NAME=your-notebook notebook/down.sh +``` + +### Where are my notebooks stored? + +The `up.sh` creates a Docker volume named after the notebook container with a `-work` suffix, e.g., `my-notebook-work`. + +### Can multiple notebook containers share the same notebook volume? + +Yes. Set the `WORK_VOLUME` environment variable to the same value for each notebook. + +```bash +NAME=my-notebook PORT=9000 WORK_VOLUME=our-work notebook/up.sh +NAME=your-notebook PORT=9001 WORK_VOLUME=our-work notebook/up.sh +``` + +### How do I run over HTTPS? + +To run the Jupyter Server with a self-signed certificate, pass the `--secure` option to the `up.sh` script. +You must also provide a password, which will be used to secure the Jupyter Server. +You can specify the password by setting the `PASSWORD` environment variable, or by passing it to the `up.sh` script. + +```bash +PASSWORD=a_secret notebook/up.sh --secure + +# or +notebook/up.sh --secure --password a_secret +``` + +### Can I use Let's Encrypt certificate chains? + +Sure. If you want to secure access to publicly addressable notebook containers, you can generate a free certificate using the [Let's Encrypt](https://letsencrypt.org) service. + +This example includes the `bin/letsencrypt.sh` script, which runs the `letsencrypt` client to create a full-chain certificate and private key, and stores them in a Docker volume. + +```{note} +The script hard codes several `letsencrypt` options, one of which automatically agrees to the Let's Encrypt Terms of Service. +``` + +The following command will create a certificate chain and store it in a Docker volume named `mydomain-secrets`. + +```bash +FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \ + SECRETS_VOLUME=mydomain-secrets \ + bin/letsencrypt.sh +``` + +Now run `up.sh` with the `--letsencrypt` option. +You must also provide the name of the secrets volume and a password. + +```bash +PASSWORD=a_secret SECRETS_VOLUME=mydomain-secrets notebook/up.sh --letsencrypt + +# or +notebook/up.sh --letsencrypt --password a_secret --secrets mydomain-secrets +``` + +Be aware that Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. +You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. +To hit their staging servers, set the environment variable `CERT_SERVER=--staging`. + +```bash +FQDN=host.mydomain.com EMAIL=myemail@somewhere.com \ + CERT_SERVER=--staging \ + bin/letsencrypt.sh +``` + +Also, be aware that Let's Encrypt certificates are short-lived (90 days). +If you need them for a longer period of time, you'll need to manually set up a cron job to run the renewal steps. +(You can reuse the command above.) + +### Can I deploy to any Docker Machine host? + +Yes, you should be able to deploy to any Docker Machine-controlled host. +To make it easier to get up and running, this example includes scripts to provision Docker Machines to VirtualBox and IBM SoftLayer, but more scripts are welcome! + +To create a Docker machine using a VirtualBox VM on local desktop: + +```bash +bin/vbox.sh mymachine +``` + +To create a Docker machine using a virtual device on IBM SoftLayer: + +```bash +export SOFTLAYER_USER=my_softlayer_username +export SOFTLAYER_API_KEY=my_softlayer_api_key +export SOFTLAYER_DOMAIN=my.domain + +# Create virtual device +bin/softlayer.sh myhost + +# Add DNS entry (SoftLayer DNS zone must exist for SOFTLAYER_DOMAIN) +bin/sl-dns.sh myhost +``` + +## Troubleshooting + +### Unable to connect to VirtualBox VM on Mac OS X when using Cisco VPN client + +The Cisco VPN client blocks access to IP addresses that it does not know about, and may block access to a new VM if it is created while the Cisco VPN client is running. + +1. Stop Cisco VPN client. (It does not allow modifications to route table). +2. Run `ifconfig` to list `vboxnet` virtual network devices. +3. Run `sudo route -nv add -net 192.168.99 -interface vboxnetX`, where X is the number of the virtual device assigned to the VirtualBox VM. +4. Start Cisco VPN client. diff --git a/docker-stacks/examples/docker-compose/bin/letsencrypt.sh b/docker-stacks/examples/docker-compose/bin/letsencrypt.sh new file mode 100755 index 0000000..ee97630 --- /dev/null +++ b/docker-stacks/examples/docker-compose/bin/letsencrypt.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Use https://letsencrypt.org to create a certificate for a single domain +# and store it in a Docker volume. + +set -e + +# Get domain and email from environment +[ -z "${FQDN}" ] && \ + echo "ERROR: Must set FQDN environment variable" && \ + exit 1 + +[ -z "${EMAIL}" ] && \ + echo "ERROR: Must set EMAIL environment variable" && \ + exit 1 + +# letsencrypt certificate server type (default is production). +# Set `CERT_SERVER=--staging` for staging. +: "${CERT_SERVER=''}" + +# Create Docker volume to contain the cert +: "${SECRETS_VOLUME:=my-notebook-secrets}" +docker volume create --name "${SECRETS_VOLUME}" 1>/dev/null +# Generate the cert and save it to the Docker volume +docker run -it --rm \ + -p 80:80 \ + -v "${SECRETS_VOLUME}":/etc/letsencrypt \ + quay.io/letsencrypt/letsencrypt:latest \ + certonly \ + --non-interactive \ + --keep-until-expiring \ + --standalone \ + --standalone-supported-challenges http-01 \ + --agree-tos \ + --domain "${FQDN}" \ + --email "${EMAIL}" \ + "${CERT_SERVER}" + +# Set permissions so nobody can read the cert and key. +# Also symlink the certs into the root of the /etc/letsencrypt +# directory so that the FQDN doesn't have to be known later. +docker run -it --rm \ + -v "${SECRETS_VOLUME}":/etc/letsencrypt \ + ubuntu:22.04 \ + bash -c "ln -s /etc/letsencrypt/live/${FQDN}/* /etc/letsencrypt/ && \ + find /etc/letsencrypt -type d -exec chmod 755 {} +" diff --git a/docker-stacks/examples/docker-compose/bin/sl-dns.sh b/docker-stacks/examples/docker-compose/bin/sl-dns.sh new file mode 100755 index 0000000..fc3ea4e --- /dev/null +++ b/docker-stacks/examples/docker-compose/bin/sl-dns.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +set -e + +# User must have slcli installed +which slcli > /dev/null || (echo "SoftLayer cli not found (pip install softlayer)"; exit 1) + +USAGE="Usage: $(basename "${0}") machine_name [domain]" +E_BADARGS=85 + +# Machine name is first command line arg +MACHINE_NAME="${1}" && [ -z "${MACHINE_NAME}" ] && echo "${USAGE}" && exit ${E_BADARGS} + +# Use SOFTLAYER_DOMAIN env var if domain name not set as second arg +DOMAIN="${2:-$SOFTLAYER_DOMAIN}" && [ -z "${DOMAIN}" ] && \ + echo "Must specify domain or set SOFTLAYER_DOMAIN environment variable" && \ + echo "${USAGE}" && exit ${E_BADARGS} + +IP=$(docker-machine ip "${MACHINE_NAME}") + +slcli dns record-add "${DOMAIN}" "${MACHINE_NAME}" A "${IP}" diff --git a/docker-stacks/examples/docker-compose/bin/softlayer.sh b/docker-stacks/examples/docker-compose/bin/softlayer.sh new file mode 100755 index 0000000..cdf4766 --- /dev/null +++ b/docker-stacks/examples/docker-compose/bin/softlayer.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Set default SoftLayer VM settings +: "${SOFTLAYER_CPU:=4}" +export SOFTLAYER_CPU +: "${SOFTLAYER_DISK_SIZE:=100}" +export SOFTLAYER_DISK_SIZE +: "${SOFTLAYER_MEMORY:=4096}" +export SOFTLAYER_MEMORY +: "${SOFTLAYER_REGION:=wdc01}" +export SOFTLAYER_REGION + +docker-machine create --driver softlayer "$@" diff --git a/docker-stacks/examples/docker-compose/bin/vbox.sh b/docker-stacks/examples/docker-compose/bin/vbox.sh new file mode 100755 index 0000000..3859719 --- /dev/null +++ b/docker-stacks/examples/docker-compose/bin/vbox.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Set reasonable default VM settings +: "${VIRTUALBOX_CPUS:=4}" +export VIRTUALBOX_CPUS +: "${VIRTUALBOX_MEMORY_SIZE:=4096}" +export VIRTUALBOX_MEMORY_SIZE + +docker-machine create --driver virtualbox "$@" diff --git a/docker-stacks/examples/docker-compose/notebook/Dockerfile b/docker-stacks/examples/docker-compose/notebook/Dockerfile new file mode 100644 index 0000000..056c22f --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/Dockerfile @@ -0,0 +1,16 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Pick your favorite docker-stacks image +FROM quay.io/jupyter/minimal-notebook + +USER root + +# Add permanent apt-get installs and other root commands here +# e.g., RUN apt-get install --yes --no-install-recommends npm nodejs + +USER ${NB_UID} + +# Switch back to jovyan to avoid accidental container runs as root +# Add permanent mamba/pip/conda installs, data files, other user libs here +# e.g., RUN pip install --no-cache-dir flake8 diff --git a/docker-stacks/examples/docker-compose/notebook/build.sh b/docker-stacks/examples/docker-compose/notebook/build.sh new file mode 100755 index 0000000..304d4f7 --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Setup environment +# shellcheck disable=SC1091 +source "${DIR}/env.sh" + +# Build the notebook image +docker-compose -f "${DIR}/notebook.yml" build diff --git a/docker-stacks/examples/docker-compose/notebook/down.sh b/docker-stacks/examples/docker-compose/notebook/down.sh new file mode 100755 index 0000000..f0fdd70 --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/down.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Setup environment +# shellcheck disable=SC1091 +source "${DIR}/env.sh" + +# Bring down the notebook container, using container name as project name +docker-compose -f "${DIR}/notebook.yml" -p "${NAME}" down diff --git a/docker-stacks/examples/docker-compose/notebook/env.sh b/docker-stacks/examples/docker-compose/notebook/env.sh new file mode 100755 index 0000000..d3ef1eb --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/env.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Set default values for environment variables required by notebook compose +# configuration file. + +# Container name +: "${NAME:=my-notebook}" +export NAME + +# Exposed container port +: "${PORT:=80}" +export PORT + +# Container work volume name +: "${WORK_VOLUME:=${NAME}-work}" +export WORK_VOLUME + +# Container secrets volume name +: "${SECRETS_VOLUME:=${NAME}-secrets}" +export SECRETS_VOLUME diff --git a/docker-stacks/examples/docker-compose/notebook/letsencrypt-notebook.yml b/docker-stacks/examples/docker-compose/notebook/letsencrypt-notebook.yml new file mode 100644 index 0000000..06bab31 --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/letsencrypt-notebook.yml @@ -0,0 +1,31 @@ +--- +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +version: "2" + +services: + notebook: + build: . + image: my-notebook + container_name: ${NAME} + volumes: + - "work:/home/jovyan/work" + - "secrets:/etc/letsencrypt" + ports: + - "${PORT}:8888" + environment: + USE_HTTPS: "yes" + PASSWORD: ${PASSWORD} + command: > + start-notebook.py + --ServerApp.certfile=/etc/letsencrypt/fullchain.pem + --ServerApp.keyfile=/etc/letsencrypt/privkey.pem + +volumes: + work: + external: + name: ${WORK_VOLUME} + secrets: + external: + name: ${SECRETS_VOLUME} diff --git a/docker-stacks/examples/docker-compose/notebook/notebook.yml b/docker-stacks/examples/docker-compose/notebook/notebook.yml new file mode 100644 index 0000000..72002a5 --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/notebook.yml @@ -0,0 +1,20 @@ +--- +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +version: "2" + +services: + notebook: + build: . + image: my-notebook + container_name: ${NAME} + volumes: + - "work:/home/jovyan/work" + ports: + - "${PORT}:8888" + +volumes: + work: + external: + name: ${WORK_VOLUME} diff --git a/docker-stacks/examples/docker-compose/notebook/secure-notebook.yml b/docker-stacks/examples/docker-compose/notebook/secure-notebook.yml new file mode 100644 index 0000000..6b38f9c --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/secure-notebook.yml @@ -0,0 +1,23 @@ +--- +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +version: "2" + +services: + notebook: + build: . + image: my-notebook + container_name: ${NAME} + volumes: + - "work:/home/jovyan/work" + ports: + - "${PORT}:8888" + environment: + USE_HTTPS: "yes" + PASSWORD: ${PASSWORD} + +volumes: + work: + external: + name: ${WORK_VOLUME} diff --git a/docker-stacks/examples/docker-compose/notebook/up.sh b/docker-stacks/examples/docker-compose/notebook/up.sh new file mode 100755 index 0000000..c02ed3a --- /dev/null +++ b/docker-stacks/examples/docker-compose/notebook/up.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +set -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +USAGE="Usage: $(basename "${0}") [--secure | --letsencrypt] [--password PASSWORD] [--secrets SECRETS_VOLUME]" + +# Parse args to determine security settings +SECURE=${SECURE:=no} +LETSENCRYPT=${LETSENCRYPT:=no} +while [[ $# -gt 0 ]]; do +key="${1}" +case "${key}" in + --secure) + SECURE=yes + ;; + --letsencrypt) + LETSENCRYPT=yes + ;; + --secrets) + SECRETS_VOLUME="${2}" + shift # past argument + ;; + --password) + PASSWORD="${2}" + export PASSWORD + shift # past argument + ;; + *) # unknown option + ;; +esac +shift # past argument or value +done + +if [[ "${LETSENCRYPT}" == yes || "${SECURE}" == yes ]]; then + if [ -z "${PASSWORD:+x}" ]; then + echo "ERROR: Must set PASSWORD if running in secure mode" + echo "${USAGE}" + exit 1 + fi + if [ "${LETSENCRYPT}" == yes ]; then + CONFIG=letsencrypt-notebook.yml + if [ -z "${SECRETS_VOLUME:+x}" ]; then + echo "ERROR: Must set SECRETS_VOLUME if running in letsencrypt mode" + echo "${USAGE}" + exit 1 + fi + else + CONFIG=secure-notebook.yml + fi + export PORT=${PORT:=443} +else + CONFIG=notebook.yml + export PORT=${PORT:=80} +fi + +# Setup environment +# shellcheck disable=SC1091 +source "${DIR}/env.sh" + +# Create a Docker volume to store notebooks +docker volume create --name "${WORK_VOLUME}" + +# Bring up a notebook container, using container name as project name +echo "Bringing up notebook '${NAME}'" +docker-compose -f "${DIR}/${CONFIG}" -p "${NAME}" up -d + +IP=$(docker-machine ip "$(docker-machine active)") +echo "Notebook ${NAME} listening on ${IP}:${PORT}" diff --git a/docker-stacks/examples/make-deploy/Dockerfile b/docker-stacks/examples/make-deploy/Dockerfile new file mode 100644 index 0000000..056c22f --- /dev/null +++ b/docker-stacks/examples/make-deploy/Dockerfile @@ -0,0 +1,16 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Pick your favorite docker-stacks image +FROM quay.io/jupyter/minimal-notebook + +USER root + +# Add permanent apt-get installs and other root commands here +# e.g., RUN apt-get install --yes --no-install-recommends npm nodejs + +USER ${NB_UID} + +# Switch back to jovyan to avoid accidental container runs as root +# Add permanent mamba/pip/conda installs, data files, other user libs here +# e.g., RUN pip install --no-cache-dir flake8 diff --git a/docker-stacks/examples/make-deploy/Makefile b/docker-stacks/examples/make-deploy/Makefile new file mode 100644 index 0000000..aa62dd0 --- /dev/null +++ b/docker-stacks/examples/make-deploy/Makefile @@ -0,0 +1,44 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +.PHONY: help check image notebook + +IMAGE:=my-notebook + +# Common, extensible docker run command +define RUN_NOTEBOOK +@docker volume create --name $(WORK_VOLUME) > /dev/null +-@docker rm --force $(NAME) 2> /dev/null +@docker run --detach -p $(PORT):8888 \ + --name $(NAME) \ + -v $(WORK_VOLUME):/home/jovyan/work \ + $(DOCKER_ARGS) \ + $(IMAGE) bash -c "$(PRE_CMD) chown jovyan /home/jovyan/work && start-notebook.py $(ARGS)" > /dev/null +@echo "DONE: Notebook '$(NAME)' listening on $$(docker-machine ip $$(docker-machine active)):$(PORT)" +endef + +help: + @cat README.md + +check: + @which docker-machine > /dev/null || (echo "ERROR: docker-machine not found (brew install docker-machine)"; exit 1) + @which docker > /dev/null || (echo "ERROR: docker not found (brew install docker)"; exit 1) + @docker | grep volume > /dev/null || (echo "ERROR: docker 1.9.0+ required"; exit 1) + +image: DOCKER_ARGS?= +image: + @docker build --rm $(DOCKER_ARGS) --tag $(IMAGE) . + +notebook: PORT?=80 +notebook: NAME?=notebook +notebook: WORK_VOLUME?=$(NAME)-data +notebook: check + $(RUN_NOTEBOOK) + +# docker-machine drivers +include virtualbox.makefile +include softlayer.makefile + +# Preset notebook configurations +include self-signed.makefile +include letsencrypt.makefile diff --git a/docker-stacks/examples/make-deploy/README.md b/docker-stacks/examples/make-deploy/README.md new file mode 100644 index 0000000..0636612 --- /dev/null +++ b/docker-stacks/examples/make-deploy/README.md @@ -0,0 +1,130 @@ +# Make deploy example + +This folder contains a Makefile and a set of supporting files demonstrating how to run a docker-stack notebook container on a docker-machine controlled host. + +## Prerequisites + +- make 3.81+ + - Ubuntu users: Be aware of [make 3.81 defect 483086](https://bugs.launchpad.net/ubuntu/+source/make-dfsg/+bug/483086) which exists in 14.04 LTS but is fixed in 15.04+ +- docker-machine 0.5.0+ +- docker 1.9.0+ + +## Quickstart + +To show what's possible, here's how to run the `jupyter/minimal-notebook` on a brand-new local virtualbox. + +```bash +# create a new VM +make virtualbox-vm NAME=dev +# make the new VM the active docker machine +eval $(docker-machine env dev) +# pull a docker stack and build a local image from it +make image +# start a Server in a container +make notebook +``` + +The last command will log the IP address and port to visit in your browser. + +## FAQ + +### Can I run multiple notebook containers on the same VM? + +Yes. Specify a unique name and port on the `make notebook` command. + +```bash +make notebook NAME=my-notebook PORT=9000 +make notebook NAME=your-notebook PORT=9001 +``` + +### Can multiple notebook containers share their notebook directory? + +Yes. + +```bash +make notebook NAME=my-notebook PORT=9000 WORK_VOLUME=our-work +make notebook NAME=your-notebook PORT=9001 WORK_VOLUME=our-work +``` + +### How do I run over HTTPS? + +Instead of `make notebook`, run `make self-signed-notebook PASSWORD=your_desired_password`. +This target gives you a notebook with a self-signed certificate. + +### That self-signed certificate is a pain. Let's Encrypt? + +Yes. Please. + +```bash +make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com +make letsencrypt-notebook +``` + +The first command creates a Docker volume named after the notebook container with a `-secrets` suffix. +It then runs the `letsencrypt` client with a slew of options (one of which has you automatically agreeing to the Let's Encrypt Terms of Service, see the Makefile). +The second command mounts the secrets volume and configures Jupyter to use the full-chain certificate and private key. + +Be aware: Let's Encrypt has a pretty [low rate limit per domain](https://community.letsencrypt.org/t/public-beta-rate-limits/4772/3) at the moment. +You can avoid exhausting your limit by testing against the Let's Encrypt staging servers. +To hit their staging servers, set the environment variable `CERT_SERVER=--staging`. + +```bash +make letsencrypt FQDN=host.mydomain.com EMAIL=myemail@somewhere.com CERT_SERVER=--staging +``` + +Also, keep in mind Let's Encrypt certificates are short-lived: 90 days at the moment. +You'll need to manually set up a cron job to run the renewal steps at the moment. +(You can reuse the first command above.) + +### My pip/conda/apt-get installs disappear every time I restart the container. Can I make them permanent? + +```bash +# add your pip, conda, apt-get, etc. permanent features to the Dockerfile where +# indicated by the comments in the Dockerfile +vi Dockerfile +make image +make notebook +``` + +### How do I upgrade my Docker container? + +```bash +make image DOCKER_ARGS=--pull +make notebook +``` + +The first line pulls the latest version of the Docker image used in the local Dockerfile. +Then it rebuilds the local Docker image containing any customizations you may have added to it. +The second line kills your currently running notebook container, and starts a fresh one using the new image. + +### Can I run on another VM provider other than VirtualBox? + +Yes. As an example, there's a `softlayer.makefile` included in this repo as an example. +You would use it like so: + +```bash +make softlayer-vm NAME=myhost \ + SOFTLAYER_DOMAIN=your_desired_domain \ + SOFTLAYER_USER=your_user_id \ + SOFTLAYER_API_KEY=your_api_key +eval $(docker-machine env myhost) +# optional, creates a real DNS entry for the VM using the machine name as the hostname +make softlayer-dns SOFTLAYER_DOMAIN=your_desired_domain +make image +make notebook +``` + +If you'd like to add support for another docker-machine driver, use the `softlayer.makefile` as a template. + +### Where are my notebooks stored? + +`make notebook` creates a Docker volume named after the notebook container with a `-data` suffix. + +### Uh ... make? + +Yes, sorry Windows users. It got the job done for a simple example. +We can certainly accept other deployment mechanism examples in the parent folder or in other repos. + +### Are there any other options? + +Yes indeed. `cat` the Makefiles and look at the target parameters. diff --git a/docker-stacks/examples/make-deploy/letsencrypt.makefile b/docker-stacks/examples/make-deploy/letsencrypt.makefile new file mode 100644 index 0000000..1954965 --- /dev/null +++ b/docker-stacks/examples/make-deploy/letsencrypt.makefile @@ -0,0 +1,60 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# BE CAREFUL when using Docker engine <1.10 because running a container with +# `--rm` option while mounting a docker volume may wipe out the volume. +# See issue: https://github.com/moby/moby/issues/17907 + +# Use letsencrypt production server by default to get a real cert. +# Use CERT_SERVER=--staging to hit the staging server (not a real cert). + +letsencrypt: NAME?=notebook +letsencrypt: SECRETS_VOLUME?=$(NAME)-secrets +letsencrypt: TMP_CONTAINER?=$(NAME)-tmp +letsencrypt: CERT_SERVER?= +letsencrypt: + @test -n "$(FQDN)" || \ + (echo "ERROR: FQDN not defined or blank"; exit 1) + @test -n "$(EMAIL)" || \ + (echo "ERROR: EMAIL not defined or blank"; exit 1) + @docker volume create --name $(SECRETS_VOLUME) > /dev/null + @docker run -it -p 80:80 \ + --name=$(TMP_CONTAINER) \ + -v $(SECRETS_VOLUME):/etc/letsencrypt \ + quay.io/letsencrypt/letsencrypt:latest \ + certonly \ + $(CERT_SERVER) \ + --keep-until-expiring \ + --standalone \ + --standalone-supported-challenges http-01 \ + --agree-tos \ + --domain '$(FQDN)' \ + --email '$(EMAIL)'; \ + docker rm --force $(TMP_CONTAINER) > /dev/null +# The letsencrypt image has an entrypoint, so we use the notebook image +# instead so we can run arbitrary commands. +# Here we set the permissions so nobody can read the cert and key. +# We also symlink the certs into the root of the /etc/letsencrypt +# directory so that the FQDN doesn't have to be known later. + @docker run -it \ + --name=$(TMP_CONTAINER) \ + -v $(SECRETS_VOLUME):/etc/letsencrypt \ + $(NOTEBOOK_IMAGE) \ + bash -c "ln -s /etc/letsencrypt/live/$(FQDN)/* /etc/letsencrypt/ && \ + find /etc/letsencrypt -type d -exec chmod 755 {} +"; \ + docker rm --force $(TMP_CONTAINER) > /dev/null + +letsencrypt-notebook: PORT?=443 +letsencrypt-notebook: NAME?=notebook +letsencrypt-notebook: WORK_VOLUME?=$(NAME)-data +letsencrypt-notebook: SECRETS_VOLUME?=$(NAME)-secrets +letsencrypt-notebook: DOCKER_ARGS:=-e USE_HTTPS=yes \ + -e PASSWORD=$(PASSWORD) \ + -v $(SECRETS_VOLUME):/etc/letsencrypt +letsencrypt-notebook: ARGS:=\ + --ServerApp.certfile=/etc/letsencrypt/fullchain.pem \ + --ServerApp.keyfile=/etc/letsencrypt/privkey.pem +letsencrypt-notebook: check + @test -n "$(PASSWORD)" || \ + (echo "ERROR: PASSWORD not defined or blank"; exit 1) + $(RUN_NOTEBOOK) diff --git a/docker-stacks/examples/make-deploy/self-signed.makefile b/docker-stacks/examples/make-deploy/self-signed.makefile new file mode 100644 index 0000000..ebdf67d --- /dev/null +++ b/docker-stacks/examples/make-deploy/self-signed.makefile @@ -0,0 +1,12 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +self-signed-notebook: PORT?=443 +self-signed-notebook: NAME?=notebook +self-signed-notebook: WORK_VOLUME?=$(NAME)-data +self-signed-notebook: DOCKER_ARGS:=-e USE_HTTPS=yes \ + -e PASSWORD=$(PASSWORD) +self-signed-notebook: check + @test -n "$(PASSWORD)" || \ + (echo "ERROR: PASSWORD not defined or blank"; exit 1) + $(RUN_NOTEBOOK) diff --git a/docker-stacks/examples/make-deploy/softlayer.makefile b/docker-stacks/examples/make-deploy/softlayer.makefile new file mode 100644 index 0000000..62519c3 --- /dev/null +++ b/docker-stacks/examples/make-deploy/softlayer.makefile @@ -0,0 +1,26 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +softlayer-vm: export SOFTLAYER_CPU?=4 +softlayer-vm: export SOFTLAYER_DISK_SIZE?=100 +softlayer-vm: export SOFTLAYER_MEMORY?=4096 +softlayer-vm: export SOFTLAYER_REGION?=wdc01 +softlayer-vm: check + @test -n "$(NAME)" || \ + (echo "ERROR: NAME not defined (make help)"; exit 1) + @test -n "$(SOFTLAYER_API_KEY)" || \ + (echo "ERROR: SOFTLAYER_API_KEY not defined (make help)"; exit 1) + @test -n "$(SOFTLAYER_USER)" || \ + (echo "ERROR: SOFTLAYER_USER not defined (make help)"; exit 1) + @test -n "$(SOFTLAYER_DOMAIN)" || \ + (echo "ERROR: SOFTLAYER_DOMAIN not defined (make help)"; exit 1) + @docker-machine create -d softlayer $(NAME) + @echo "DONE: Docker host '$(NAME)' up at $$(docker-machine ip $(NAME))" + +softlayer-dns: HOST_NAME:=$$(docker-machine active) +softlayer-dns: IP:=$$(docker-machine ip $(HOST_NAME)) +softlayer-dns: check + @which slcli > /dev/null || (echo "softlayer cli not found (pip install softlayer)"; exit 1) + @test -n "$(SOFTLAYER_DOMAIN)" || \ + (echo "ERROR: SOFTLAYER_DOMAIN not defined (make help)"; exit 1) + @slcli dns record-add $(SOFTLAYER_DOMAIN) $(HOST_NAME) A $(IP) diff --git a/docker-stacks/examples/make-deploy/virtualbox.makefile b/docker-stacks/examples/make-deploy/virtualbox.makefile new file mode 100644 index 0000000..73d6e44 --- /dev/null +++ b/docker-stacks/examples/make-deploy/virtualbox.makefile @@ -0,0 +1,10 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +virtualbox-vm: export VIRTUALBOX_CPU_COUNT?=4 +virtualbox-vm: export VIRTUALBOX_DISK_SIZE?=100000 +virtualbox-vm: export VIRTUALBOX_MEMORY_SIZE?=4096 +virtualbox-vm: check + @test -n "$(NAME)" || \ + (echo "ERROR: NAME not defined (make help)"; exit 1) + @docker-machine create -d virtualbox $(NAME) diff --git a/docker-stacks/examples/openshift/README.md b/docker-stacks/examples/openshift/README.md new file mode 100644 index 0000000..65928d4 --- /dev/null +++ b/docker-stacks/examples/openshift/README.md @@ -0,0 +1,229 @@ +# OpenShift example + +This example provides templates for deploying the Jupyter Project docker-stacks images to OpenShift. + +## Prerequisites + +Any OpenShift 3 environment. +The templates were tested with OpenShift 3.7. +It is believed they should work with at least OpenShift 3.6 or later. + +Do be aware that the Jupyter Project docker-stacks images are very large. +The OpenShift environment you are using must provide sufficient quota on the per-user space for images and the file system for running containers. +If the quota is too small, the pulling of the images to a node in the OpenShift cluster when deploying them, will fail due to lack of space. +Even if the image is able to run, if the quota is only just larger than the space required for the image, you will not be able to install many packages into the container before running out of space. + +OpenShift Online, the public hosted version of OpenShift from Red Hat has a quota of only 3GB for the image and container file system. +As a result, only the `minimal-notebook` can be started and there is little space remaining to install additional packages. +Although OpenShift Online is suitable for demonstrating these templates work, what you can do in that environment will be limited due to the size of the images. + +If you want to experiment with using Jupyter Notebooks in an OpenShift environment, you should instead use [Minishift](https://www.openshift.org/minishift/). +Minishift provides you the ability to run OpenShift in a virtual machine on your own local computer. + +## Loading the Templates + +To load the templates, login to OpenShift from the command line and run: + +```bash +oc create -f https://raw.githubusercontent.com/jupyter-on-openshift/docker-stacks/master/examples/openshift/templates.json +``` + +This should create the `jupyter-notebook` template + +The template can be used from the command line using the `oc new-app` command, or from the OpenShift web console by selecting _Add to Project_. +This `README` is only going to explain deploying from the command line. + +## Deploying a Notebook + +To deploy a notebook from the command line using the template, run: + +```bash +oc new-app --template jupyter-notebook +``` + +The output will be similar to: + +```lang-none +--> Deploying template "jupyter/jupyter-notebook" to project jupyter + + Jupyter Notebook + --------- + Template for deploying Jupyter Notebook images. + + * With parameters: + * APPLICATION_NAME=notebook + * NOTEBOOK_IMAGE=docker.io/jupyter/minimal-notebook:latest + * NOTEBOOK_PASSWORD=ded4d7cada554aa48e0db612e1ed1080 # generated + +--> Creating resources ... + configmap "notebook-cfg" created + deploymentconfig "notebook" created + route "notebook" created + service "notebook" created +--> Success + Access your application via route 'notebook-jupyter.b9ad.pro-us-east-1.openshiftapps.com' + Run 'oc status' to view your app. +``` + +When no template parameters are provided, the name of the deployed notebook will be `notebook`. +The image used will be: + +```lang-none +docker.io/jupyter/minimal-notebook:latest +``` + +A password you can use when accessing the notebook will be auto generated and is displayed in the output from running `oc new-app`. + +To see the hostname for accessing the notebook run: + +```bash +oc get routes +``` + +The output will be similar to: + +```lang-none +NAME HOST/PORT PATH SERVICES PORT TERMINATION WILDCARD +notebook notebook-jupyter.abcd.pro-us-east-1.openshiftapps.com notebook 8888-tcp edge/Redirect None +``` + +A secure route will be used to expose the notebook outside the OpenShift cluster, so in this case the URL would be: + +```lang-none +https://notebook-jupyter.abcd.pro-us-east-1.openshiftapps.com/ +``` + +When prompted, enter the password for the notebook. + +## Passing Template Parameters + +To override the name for the notebook, the image used, and the password, you can pass template parameters using the `--param` option. + +```bash +oc new-app --template jupyter-notebook \ + --param APPLICATION_NAME=mynotebook \ + --param NOTEBOOK_IMAGE=docker.io/jupyter/scipy-notebook:latest \ + --param NOTEBOOK_PASSWORD=mypassword +``` + +You can deploy any of the Jupyter Project docker-stacks images. + +If you don't care what version of the image is used, add the `:latest` tag at the end of the image name, otherwise use the hash corresponding to the image version you want to use. + +## Deleting the Notebook Instance + +To delete the notebook instance, run `oc delete` using a label selector for the application name. + +```bash +oc delete all,configmap --selector app=mynotebook +``` + +## Adding Persistent Storage + +You can upload notebooks and other files using the web interface of the notebook. +Any uploaded files or changes you make to them will be lost when the notebook instance is restarted. +If you want to save your work, you need to add persistent storage to the notebook. +To add persistent storage run: + +```bash +oc set volume dc/mynotebook --add \ + --type=pvc --claim-size=1Gi --claim-mode=ReadWriteOnce \ + --claim-name mynotebook-data --name data \ + --mount-path /home/jovyan +``` + +When you have deleted the notebook instance, if using a persistent volume, you will need to delete it in a separate step. + +```bash +oc delete pvc/mynotebook-data +``` + +## Customizing the Configuration + +If you want to set any custom configuration for the notebook, you can edit the config map created by the template. + +```bash +oc edit configmap/mynotebook-cfg +``` + +The `data` field of the config map contains Python code used as the `jupyter_server_config.py` file. + +If you are using a persistent volume, you can also create a configuration file at: + +```lang-none +/home/jovyan/.jupyter/jupyter_server_config.py +``` + +This will be merged at the end of the configuration from the config map. + +Because the configuration is Python code, ensure any indenting is correct. +Any errors in the configuration file will cause the notebook to fail when starting. + +If the error is in the config map, edit it again to fix it and trigger a new deployment if necessary by running: + +```bash +oc rollout latest dc/mynotebook +``` + +If you make an error in the configuration file stored in the persistent volume, you will need to scale down the notebook, so it isn't running. + +```bash +oc scale dc/mynotebook --replicas 0 +``` + +Then run: + +```bash +oc debug dc/mynotebook +``` + +to run the notebook in debug mode. +This will provide you with an interactive terminal session inside a running container, but the notebook will not have been started. +Edit the configuration file in the volume to fix any errors and exit the terminal session. + +Start up the notebook again. + +```bash +oc scale dc/mynotebook --replicas 1 +``` + +## Changing the Notebook Password + +The password for the notebook is supplied as a template parameter, or if not supplied will be automatically generated by the template. +It will be passed into the container through an environment variable. + +If you want to change the password, you can do so by editing the environment variable on the deployment configuration. + +```bash +oc set env dc/mynotebook JUPYTER_NOTEBOOK_PASSWORD=mypassword +``` + +This will trigger a new deployment so ensure you have downloaded any work if not using a persistent volume. + +If using a persistent volume, you could instead set up a password in the file `/home/jovyan/.jupyter/jupyter_server_config.py` as per guidelines in . + +## Deploying from a Custom Image + +If you want to deploy a custom variant of the Jupyter Project docker-stacks images, you can replace the image name with that of your own. +If the image is not stored on Docker Hub, but some other public image registry, prefix the name of the image with the image registry host details. + +If the image is in your OpenShift project, because you imported the image into OpenShift, or used the docker build strategy of OpenShift to build a derived custom image, +you can use the name of the image stream for the image name, including any image tag if necessary. + +This can be illustrated by first importing an image into the OpenShift project. + +```bash +oc import-image docker.io/jupyter/datascience-notebook:latest --confirm +``` + +Then deploy it using the name of the image stream created. + +```bash +oc new-app --template jupyter-notebook \ + --param APPLICATION_NAME=mynotebook \ + --param NOTEBOOK_IMAGE=datascience-notebook \ + --param NOTEBOOK_PASSWORD=mypassword +``` + +Importing an image into OpenShift before deploying it means that when a notebook is started, the image need only be pulled from the internal OpenShift image registry rather than Docker Hub for each deployment. +Because the images are so large, this can speed up deployments when the image hasn't previously been deployed to a node in the OpenShift cluster. diff --git a/docker-stacks/examples/openshift/templates.json b/docker-stacks/examples/openshift/templates.json new file mode 100644 index 0000000..ebcd6cb --- /dev/null +++ b/docker-stacks/examples/openshift/templates.json @@ -0,0 +1,175 @@ +{ + "kind": "Template", + "apiVersion": "v1", + "metadata": { + "name": "jupyter-notebook", + "annotations": { + "openshift.io/display-name": "Jupyter Notebook", + "description": "Template for deploying Jupyter Notebook images.", + "iconClass": "icon-python", + "tags": "python,jupyter" + } + }, + "parameters": [ + { + "name": "APPLICATION_NAME", + "value": "notebook", + "required": true + }, + { + "name": "NOTEBOOK_IMAGE", + "value": "docker.io/jupyter/minimal-notebook:latest", + "required": true + }, + { + "name": "NOTEBOOK_PASSWORD", + "from": "[a-f0-9]{32}", + "generate": "expression" + } + ], + "objects": [ + { + "kind": "ConfigMap", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}-cfg", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "data": { + "jupyter_server_config.py": "import os\n\npassword = os.environ.get('JUPYTER_NOTEBOOK_PASSWORD')\n\nif password:\n from jupyter_server.auth import passwd\n c.ServerApp.password = passwd(password)\n del password\n del os.environ['JUPYTER_NOTEBOOK_PASSWORD']\n\nimage_config_file = '/home/jovyan/.jupyter/jupyter_server_config.py'\n\nif os.path.exists(image_config_file):\n with open(image_config_file) as fp:\n exec(compile(fp.read(), image_config_file, 'exec'), globals())\n" + } + }, + { + "kind": "DeploymentConfig", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "strategy": { + "type": "Recreate" + }, + "triggers": [ + { + "type": "ConfigChange" + } + ], + "replicas": 1, + "selector": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + }, + "template": { + "metadata": { + "annotations": { + "alpha.image.policy.openshift.io/resolve-names": "*" + }, + "labels": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + } + }, + "spec": { + "containers": [ + { + "name": "jupyter-notebook", + "image": "${NOTEBOOK_IMAGE}", + "command": [ + "start-notebook.py", + "--config=/etc/jupyter/openshift/jupyter_server_config.py", + "--no-browser", + "--ip=0.0.0.0" + ], + "ports": [ + { + "containerPort": 8888, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "JUPYTER_NOTEBOOK_PASSWORD", + "value": "${NOTEBOOK_PASSWORD}" + } + ], + "volumeMounts": [ + { + "mountPath": "/etc/jupyter/openshift", + "name": "configs" + } + ] + } + ], + "automountServiceAccountToken": false, + "securityContext": { + "supplementalGroups": [100] + }, + "volumes": [ + { + "configMap": { + "name": "${APPLICATION_NAME}-cfg" + }, + "name": "configs" + } + ] + } + } + } + }, + { + "kind": "Route", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "host": "", + "to": { + "kind": "Service", + "name": "${APPLICATION_NAME}", + "weight": 100 + }, + "port": { + "targetPort": "8888-tcp" + }, + "tls": { + "termination": "edge", + "insecureEdgeTerminationPolicy": "Redirect" + } + } + }, + { + "kind": "Service", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "ports": [ + { + "name": "8888-tcp", + "protocol": "TCP", + "port": 8888, + "targetPort": 8888 + } + ], + "selector": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + }, + "type": "ClusterIP" + } + } + ] +} diff --git a/docker-stacks/examples/source-to-image/README.md b/docker-stacks/examples/source-to-image/README.md new file mode 100644 index 0000000..6d4bcb8 --- /dev/null +++ b/docker-stacks/examples/source-to-image/README.md @@ -0,0 +1,170 @@ +# Custom Jupyter Notebook images + +This example provides scripts for building custom Jupyter Notebook images containing notebooks, data files, and with Python packages required by the notebooks already installed. +The scripts provided work with the Source-to-Image tool, and you can create the images from the command line on your own computer. +Templates are also provided to enable running builds in OpenShift, as well as deploying the resulting image to OpenShift to make it available. + +The build scripts, when used with the Source-to-Image tool, provide similar capabilities to `repo2docker`. +When builds are run under OpenShift with the supplied templates, it provides similar capabilities to `mybinder.org`, +but where notebook instances are deployed in your existing OpenShift project and JupyterHub is not required. + +For separate examples of using JupyterHub with OpenShift, see the project: + +- + +## Source-to-Image Project + +Source-to-Image (S2I) is an open source project which provides a tool for creating container images. +It works by taking a base image, injecting additional source code or files into a running container created from the base image, +and running a builder script in the container to process the source code or files to prepare the new image. + +Details on the S2I tool, and executable binaries for Linux, macOS and Windows, can be found on GitHub at: + +- + +The tool is standalone, and can be used on any system which provides a docker daemon for running containers. +To provide an end-to-end capability to build and deploy applications in containers, support for S2I is also integrated into container platforms such as OpenShift. + +## Getting Started with S2I + +As an example of how S2I can be used to create a custom image with a bundled set of notebooks, run: + +```bash +s2i build \ + --scripts-url https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image \ + --context-dir docs/source/examples/Notebook \ + https://github.com/jupyter/notebook \ + docker.io/jupyter/minimal-notebook:latest \ + notebook-examples +``` + +This example command will pull down the Git repository +and build the image `notebook-examples` using the files contained in the `docs/source/examples/Notebook` directory of that Git repository. +The base image which the files will be combined with is `docker.io/jupyter/minimal-notebook:latest`, but you can specify any of the Jupyter Project `docker-stacks` images as the base image. + +The resulting image from running the command can be seen by running `docker images` command: + +```bash +docker images +# REPOSITORY TAG IMAGE ID CREATED SIZE +# notebook-examples latest f5899ed1241d 2 minutes ago 2.59GB +``` + +You can now run the image. + +```bash +docker run --rm -p 8888:8888 notebook-examples +``` + +Open your browser on the URL displayed, and you will find the notebooks from the Git repository and can work with them. + +## The S2I Builder Scripts + +Normally when using S2I, the base image would be S2I enabled and contain the builder scripts needed to prepare the image and define how the application in the image should be run. +As the Jupyter Project `docker-stacks` images are not S2I enabled (although they could be), +in the above example the `--scripts-url` option has been used to specify that the example builder scripts contained in this directory of this Git repository should be used. + +Using the `--scripts-url` option, the builder scripts can be hosted on any HTTP server, +or you could also use builder scripts local to your computer file using an appropriate `file://` format URI argument to `--scripts-url`. + +The builder scripts in this directory of this repository are `assemble` and `run` and are provided as examples of what can be done. +You can use the scripts as is, or create your own. + +The supplied `assemble` script performs a few key steps. + +The first steps copy files into the location they need to be when the image is run, from the directory where they are initially placed by the `s2i` command. + +```bash +cp -Rf /tmp/src/. "/home/${NB_USER}" + +rm -rf /tmp/src +``` + +The next steps are: + +```bash +if [ -f "/home/${NB_USER}/environment.yml" ]; then + mamba env update --name root --file "/home/${NB_USER}/environment.yml" + mamba clean --all -f -y +else + if [ -f "/home/${NB_USER}/requirements.txt" ]; then + pip --no-cache-dir install -r "/home/${NB_USER}/requirements.txt" + fi +fi +``` + +This determines whether a `environment.yml` or `requirements.txt` file exists with the files and if so, runs the appropriate package management tool to install any Python packages listed in those files. + +This means that so long as a set of notebook files provides one of these files listing what Python packages they need, +those packages will be automatically installed into the image, so they are available when the image is run. + +A final step is: + +```bash +fix-permissions "${CONDA_DIR}" +fix-permissions "/home/${NB_USER}" +``` + +This fixes up permissions on any new files created by the build. +This is necessary to ensure that when the image is run, you can still install additional files. +This is important for when an image is run in `sudo` mode, or it is hosted in a more secure container platform such as Kubernetes/OpenShift where it will be run as a set user ID that isn't known in advance. + +As long as you preserve the first and last set of steps, you can do whatever you want in the `assemble` script to install packages, create files etc. +Do be aware though that S2I builds do not run as `root` and so you cannot install additional system packages. +If you need to install additional system packages, use a `Dockerfile` and normal `docker build` to first create a new custom base image from the Jupyter Project `docker-stacks` images, +with the extra system packages, and then use that image with the S2I build to combine your notebooks and have Python packages installed. + +The `run` script in this directory is very simple and just runs the notebook application. + +```bash +exec start-notebook.py "$@" +``` + +## Integration with OpenShift + +The OpenShift platform provides integrated support for S2I type builds. +Templates are provided for using the S2I build mechanism with the scripts in this directory. +To load the templates run: + +```bash +oc create -f https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image/templates.json +``` + +This will create the templates: + +```bash +jupyter-notebook-builder +jupyter-notebook-quickstart +``` + +The templates can be used from the OpenShift web console or command line. +This `README` is only going to explain deploying from the command line. + +To use the OpenShift command line to build into an image, and deploy, the set of notebooks used above, run: + +```bash +oc new-app --template jupyter-notebook-quickstart \ + --param APPLICATION_NAME=notebook-examples \ + --param GIT_REPOSITORY_URL=https://github.com/jupyter/notebook \ + --param CONTEXT_DIR=docs/source/examples/Notebook \ + --param BUILDER_IMAGE=docker.io/jupyter/minimal-notebook:latest \ + --param NOTEBOOK_PASSWORD=mypassword +``` + +You can provide a password using the `NOTEBOOK_PASSWORD` parameter. +If you don't set that parameter, a password will be generated, with it being displayed by the `oc new-app` command. + +Once the image has been built, it will be deployed. +To see the hostname for accessing the notebook, run `oc get routes`. + +```lang-none +NAME HOST/PORT PATH SERVICES PORT TERMINATION WILDCARD +notebook-examples notebook-examples-jupyter.abcd.pro-us-east-1.openshiftapps.com notebook-examples 8888-tcp edge/Redirect None +``` + +As the deployment will use a secure connection, the URL for accessing the notebook in this case would be . + +If you only want to build an image but not deploy it, you can use the `jupyter-notebook-builder` template. +You can then deploy it using the `jupyter-notebook` template provided with the [openshift](../openshift) examples directory. + +See the `openshift` examples directory for further information on customizing configuration for a Jupyter Notebook deployment and deleting a deployment. diff --git a/docker-stacks/examples/source-to-image/assemble b/docker-stacks/examples/source-to-image/assemble new file mode 100755 index 0000000..56f23aa --- /dev/null +++ b/docker-stacks/examples/source-to-image/assemble @@ -0,0 +1,40 @@ +#!/bin/bash + +set -x + +set -eo pipefail + +# Remove any 'environment.yml' or 'requirements.txt' files which may +# have been carried over from the base image so we don't reinstall +# packages which have already been installed. This could occur where +# an S2I build was used to create a new base image with pre-installed +# Python packages, with the new image then subsequently being used as a +# S2I builder base image. + +rm -f "/home/${NB_USER}/environment.yml" +rm -f "/home/${NB_USER}/requirements.txt" + +# Copy injected files to target directory. + +cp -Rf /tmp/src/. "/home/${NB_USER}" + +rm -rf /tmp/src + +# Install any Python modules. If we find an 'environment.yml' file we +# assume we should use 'conda' to install packages. If 'requirements.txt' +# use 'pip' instead. + +if [ -f "/home/${NB_USER}/environment.yml" ]; then + mamba env update --name root --file "/home/${NB_USER}/environment.yml" + mamba clean --all -f -y +else + if [ -f "/home/${NB_USER}/requirements.txt" ]; then + pip --no-cache-dir install -r "/home/${NB_USER}/requirements.txt" + fi +fi + +# Fix up permissions on home directory and Python installation so that +# everything is still writable by 'users' group. + +fix-permissions "${CONDA_DIR}" +fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/examples/source-to-image/run b/docker-stacks/examples/source-to-image/run new file mode 100755 index 0000000..556efdd --- /dev/null +++ b/docker-stacks/examples/source-to-image/run @@ -0,0 +1,5 @@ +#!/bin/bash + +# Start up the notebook instance. + +exec start-notebook.py "$@" diff --git a/docker-stacks/examples/source-to-image/save-artifacts b/docker-stacks/examples/source-to-image/save-artifacts new file mode 100755 index 0000000..0ab29c8 --- /dev/null +++ b/docker-stacks/examples/source-to-image/save-artifacts @@ -0,0 +1,3 @@ +#!/bin/bash + +tar cf - --files-from /dev/null diff --git a/docker-stacks/examples/source-to-image/templates.json b/docker-stacks/examples/source-to-image/templates.json new file mode 100644 index 0000000..aa67766 --- /dev/null +++ b/docker-stacks/examples/source-to-image/templates.json @@ -0,0 +1,371 @@ +{ + "kind": "List", + "apiVersion": "v1", + "items": [ + { + "kind": "Template", + "apiVersion": "v1", + "metadata": { + "name": "jupyter-notebook-builder", + "annotations": { + "openshift.io/display-name": "Jupyter Notebook Builder", + "description": "Template for building Jupyter Notebook images with bundled notebooks and files.", + "iconClass": "icon-python", + "tags": "python,jupyter" + } + }, + "parameters": [ + { + "name": "IMAGE_NAME", + "value": "notebook", + "required": true + }, + { + "name": "BUILDER_IMAGE", + "value": "docker.io/jupyter/minimal-notebook:latest", + "required": true + }, + { + "name": "BUILDER_SCRIPTS", + "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", + "required": true + }, + { + "name": "GIT_REPOSITORY_URL", + "value": "", + "required": true + }, + { + "name": "GIT_REFERENCE", + "value": "main", + "required": true + }, + { + "name": "CONTEXT_DIR", + "value": "", + "required": false + } + ], + "objects": [ + { + "apiVersion": "v1", + "kind": "ImageStream", + "metadata": { + "name": "${IMAGE_NAME}", + "labels": { + "app": "${IMAGE_NAME}" + } + } + }, + { + "apiVersion": "v1", + "kind": "BuildConfig", + "metadata": { + "name": "${IMAGE_NAME}", + "labels": { + "app": "${IMAGE_NAME}" + } + }, + "spec": { + "output": { + "to": { + "kind": "ImageStreamTag", + "name": "${IMAGE_NAME}:latest" + } + }, + "resources": { + "limits": { + "memory": "1Gi" + } + }, + "source": { + "type": "Git", + "git": { + "uri": "${GIT_REPOSITORY_URL}", + "ref": "${GIT_REFERENCE}" + }, + "contextDir": "${CONTEXT_DIR}" + }, + "strategy": { + "type": "Source", + "sourceStrategy": { + "from": { + "kind": "DockerImage", + "name": "${BUILDER_IMAGE}" + }, + "scripts": "${BUILDER_SCRIPTS}" + } + }, + "triggers": [ + { + "type": "ConfigChange" + } + ] + } + } + ] + }, + { + "kind": "Template", + "apiVersion": "v1", + "metadata": { + "name": "jupyter-notebook-quickstart", + "annotations": { + "openshift.io/display-name": "Jupyter Notebook QuickStart", + "description": "Template for deploying Jupyter Notebook images with bundled notebooks and files.", + "iconClass": "icon-python", + "tags": "python,jupyter" + } + }, + "parameters": [ + { + "name": "APPLICATION_NAME", + "value": "notebook", + "required": true + }, + { + "name": "BUILDER_IMAGE", + "value": "docker.io/jupyter/minimal-notebook:latest", + "required": true + }, + { + "name": "BUILDER_SCRIPTS", + "value": "https://raw.githubusercontent.com/jupyter/docker-stacks/main/examples/source-to-image", + "required": true + }, + { + "name": "GIT_REPOSITORY_URL", + "value": "", + "required": true + }, + { + "name": "GIT_REFERENCE", + "value": "main", + "required": true + }, + { + "name": "CONTEXT_DIR", + "value": "", + "required": false + }, + { + "name": "NOTEBOOK_PASSWORD", + "from": "[a-f0-9]{32}", + "generate": "expression" + } + ], + "objects": [ + { + "apiVersion": "v1", + "kind": "ImageStream", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + } + }, + { + "apiVersion": "v1", + "kind": "BuildConfig", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "output": { + "to": { + "kind": "ImageStreamTag", + "name": "${APPLICATION_NAME}:latest" + } + }, + "resources": { + "limits": { + "memory": "1Gi" + } + }, + "source": { + "type": "Git", + "git": { + "uri": "${GIT_REPOSITORY_URL}", + "ref": "${GIT_REFERENCE}" + }, + "contextDir": "${CONTEXT_DIR}" + }, + "strategy": { + "type": "Source", + "sourceStrategy": { + "from": { + "kind": "DockerImage", + "name": "${BUILDER_IMAGE}" + }, + "scripts": "${BUILDER_SCRIPTS}" + } + }, + "triggers": [ + { + "type": "ConfigChange" + } + ] + } + }, + { + "kind": "ConfigMap", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}-cfg", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "data": { + "jupyter_server_config.py": "import os\n\npassword = os.environ.get('JUPYTER_NOTEBOOK_PASSWORD')\n\nif password:\n from jupyter_server.auth import passwd\n c.ServerApp.password = passwd(password)\n del password\n del os.environ['JUPYTER_NOTEBOOK_PASSWORD']\n\nimage_config_file = '/home/jovyan/.jupyter/jupyter_server_config.py'\n\nif os.path.exists(image_config_file):\n with open(image_config_file) as fp:\n exec(compile(fp.read(), image_config_file, 'exec'), globals())\n" + } + }, + { + "kind": "DeploymentConfig", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "strategy": { + "type": "Recreate" + }, + "triggers": [ + { + "type": "ConfigChange" + }, + { + "type": "ImageChange", + "imageChangeParams": { + "automatic": true, + "containerNames": ["jupyter-notebook"], + "from": { + "kind": "ImageStreamTag", + "name": "${APPLICATION_NAME}:latest" + } + } + } + ], + "replicas": 1, + "selector": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + }, + "template": { + "metadata": { + "annotations": { + "alpha.image.policy.openshift.io/resolve-names": "*" + }, + "labels": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + } + }, + "spec": { + "containers": [ + { + "name": "jupyter-notebook", + "image": "${APPLICATION_NAME}:latest", + "command": [ + "start-notebook.py", + "--config=/etc/jupyter/openshift/jupyter_server_config.py", + "--no-browser", + "--ip=0.0.0.0" + ], + "ports": [ + { + "containerPort": 8888, + "protocol": "TCP" + } + ], + "env": [ + { + "name": "JUPYTER_NOTEBOOK_PASSWORD", + "value": "${NOTEBOOK_PASSWORD}" + } + ], + "volumeMounts": [ + { + "mountPath": "/etc/jupyter/openshift", + "name": "configs" + } + ] + } + ], + "automountServiceAccountToken": false, + "securityContext": { + "supplementalGroups": [100] + }, + "volumes": [ + { + "configMap": { + "name": "${APPLICATION_NAME}-cfg" + }, + "name": "configs" + } + ] + } + } + } + }, + { + "kind": "Route", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "host": "", + "to": { + "kind": "Service", + "name": "${APPLICATION_NAME}", + "weight": 100 + }, + "port": { + "targetPort": "8888-tcp" + }, + "tls": { + "termination": "edge", + "insecureEdgeTerminationPolicy": "Redirect" + } + } + }, + { + "kind": "Service", + "apiVersion": "v1", + "metadata": { + "name": "${APPLICATION_NAME}", + "labels": { + "app": "${APPLICATION_NAME}" + } + }, + "spec": { + "ports": [ + { + "name": "8888-tcp", + "protocol": "TCP", + "port": 8888, + "targetPort": 8888 + } + ], + "selector": { + "app": "${APPLICATION_NAME}", + "deploymentconfig": "${APPLICATION_NAME}" + }, + "type": "ClusterIP" + } + } + ] + } + ] +} diff --git a/docker-stacks/images/all-spark-notebook/.dockerignore b/docker-stacks/images/all-spark-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/all-spark-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/all-spark-notebook/Dockerfile b/docker-stacks/images/all-spark-notebook/Dockerfile new file mode 100644 index 0000000..1a97294 --- /dev/null +++ b/docker-stacks/images/all-spark-notebook/Dockerfile @@ -0,0 +1,39 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/pyspark-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# RSpark config +ENV R_LIBS_USER "${SPARK_HOME}/R/lib" +RUN fix-permissions "${R_LIBS_USER}" + +# R pre-requisites +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + fonts-dejavu \ + gfortran \ + gcc && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +USER ${NB_UID} + +# R packages including IRKernel which gets installed globally. +RUN mamba install --yes \ + 'r-base' \ + 'r-ggplot2' \ + 'r-irkernel' \ + 'r-rcurl' \ + 'r-sparklyr' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/all-spark-notebook/README.md b/docker-stacks/images/all-spark-notebook/README.md new file mode 100644 index 0000000..1619e77 --- /dev/null +++ b/docker-stacks/images/all-spark-notebook/README.md @@ -0,0 +1,15 @@ +# Jupyter Notebook Python, R, Spark Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/all-spark-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/all-spark-notebook.svg)](https://hub.docker.com/r/jupyter/all-spark-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/all-spark-notebook.svg)](https://hub.docker.com/r/jupyter/all-spark-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/all-spark-notebook/latest)](https://hub.docker.com/r/jupyter/all-spark-notebook/ "jupyter/all-spark-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/all-spark-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-all-spark-notebook) +- [Image Specifics :: Apache Spark](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#apache-spark) diff --git a/docker-stacks/images/base-notebook/.dockerignore b/docker-stacks/images/base-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/base-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/base-notebook/Dockerfile b/docker-stacks/images/base-notebook/Dockerfile new file mode 100644 index 0000000..07903b9 --- /dev/null +++ b/docker-stacks/images/base-notebook/Dockerfile @@ -0,0 +1,77 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/docker-stacks-foundation +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# Install all OS dependencies for the Server that starts +# but lacks all features (e.g., download as all possible file formats) +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + # - Add necessary fonts for matplotlib/seaborn + # See https://github.com/jupyter/docker-stacks/pull/380 for details + fonts-liberation \ + # - `pandoc` is used to convert notebooks to html files + # it's not present in the aarch64 Ubuntu image, so we install it here + pandoc \ + # - `run-one` - a wrapper script that runs no more + # than one unique instance of some command with a unique set of arguments, + # we use `run-one-constantly` to support the `RESTARTABLE` option + run-one && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +USER ${NB_UID} + +# Install JupyterLab, Jupyter Notebook, JupyterHub and NBClassic +# Generate a Jupyter Server config +# Cleanup temporary files +# Correct permissions +# Do all this in a single RUN command to avoid duplicating all of the +# files across image layers when the permissions change +WORKDIR /tmp +RUN mamba install --yes \ + 'jupyterlab' \ + 'notebook' \ + 'jupyterhub' \ + 'nbclassic' && \ + jupyter server --generate-config && \ + mamba clean --all -f -y && \ + npm cache clean --force && \ + jupyter lab clean && \ + rm -rf "/home/${NB_USER}/.cache/yarn" && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +ENV JUPYTER_PORT=8888 +EXPOSE $JUPYTER_PORT + +# Configure container startup +CMD ["start-notebook.py"] + +# Copy local files as late as possible to avoid cache busting +COPY start-notebook.py start-notebook.sh start-singleuser.py start-singleuser.sh /usr/local/bin/ +COPY jupyter_server_config.py docker_healthcheck.py /etc/jupyter/ + +# Fix permissions on /etc/jupyter as root +USER root +RUN fix-permissions /etc/jupyter/ + +# HEALTHCHECK documentation: https://docs.docker.com/engine/reference/builder/#healthcheck +# This healtcheck works well for `lab`, `notebook`, `nbclassic`, `server`, and `retro` jupyter commands +# https://github.com/jupyter/docker-stacks/issues/915#issuecomment-1068528799 +HEALTHCHECK --interval=3s --timeout=1s --start-period=3s --retries=3 \ + CMD /etc/jupyter/docker_healthcheck.py || exit 1 + +# Switch back to jovyan to avoid accidental container runs as root +USER ${NB_UID} + +WORKDIR "${HOME}" diff --git a/docker-stacks/images/base-notebook/README.md b/docker-stacks/images/base-notebook/README.md new file mode 100644 index 0000000..6713599 --- /dev/null +++ b/docker-stacks/images/base-notebook/README.md @@ -0,0 +1,14 @@ +# Base Jupyter Notebook Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/base-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/base-notebook.svg)](https://hub.docker.com/r/jupyter/base-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/base-notebook.svg)](https://hub.docker.com/r/jupyter/base-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/base-notebook/latest)](https://hub.docker.com/r/jupyter/base-notebook/ "jupyter/base-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/base-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-base-notebook) diff --git a/docker-stacks/images/base-notebook/docker_healthcheck.py b/docker-stacks/images/base-notebook/docker_healthcheck.py new file mode 100755 index 0000000..7dd3de0 --- /dev/null +++ b/docker-stacks/images/base-notebook/docker_healthcheck.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import json +import os +import subprocess +from pathlib import Path + +import requests + +# Several operations below deliberately don't check for possible errors +# As this is a healthcheck, it should succeed or raise an exception on error + +# Docker runs healtchecks using an exec +# It uses the default user configured when running the image: root for the case of a custom NB_USER or jovyan for the case of the default image user. +# We manually change HOME to make `jupyter --runtime-dir` report a correct path +# More information: +result = subprocess.run( + ["jupyter", "--runtime-dir"], + check=True, + capture_output=True, + text=True, + env=dict(os.environ) | {"HOME": "/home/" + os.environ["NB_USER"]}, +) +runtime_dir = Path(result.stdout.rstrip()) + +json_file = next(runtime_dir.glob("*server-*.json")) + +url = json.loads(json_file.read_bytes())["url"] +url = url + "api" + +proxies = { + "http": "", + "https": "", +} + +r = requests.get(url, proxies=proxies, verify=False) # request without SSL verification +r.raise_for_status() +print(r.content) diff --git a/docker-stacks/images/base-notebook/jupyter_server_config.py b/docker-stacks/images/base-notebook/jupyter_server_config.py new file mode 100644 index 0000000..c0cca3a --- /dev/null +++ b/docker-stacks/images/base-notebook/jupyter_server_config.py @@ -0,0 +1,58 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +# mypy: ignore-errors +import os +import stat +import subprocess +from pathlib import Path + +from jupyter_core.paths import jupyter_data_dir + +c = get_config() # noqa: F821 +c.ServerApp.ip = "0.0.0.0" +c.ServerApp.open_browser = False + +# to output both image/svg+xml and application/pdf plot formats in the notebook file +c.InlineBackend.figure_formats = {"png", "jpeg", "svg", "pdf"} + +# https://github.com/jupyter/notebook/issues/3130 +c.FileContentsManager.delete_to_trash = False + +# Generate a self-signed certificate +OPENSSL_CONFIG = """\ +[req] +distinguished_name = req_distinguished_name +[req_distinguished_name] +""" +if "GEN_CERT" in os.environ: + dir_name = Path(jupyter_data_dir()) + dir_name.mkdir(parents=True, exist_ok=True) + pem_file = dir_name / "notebook.pem" + + # Generate an openssl.cnf file to set the distinguished name + cnf_file = Path(os.getenv("CONDA_DIR", "/usr/lib")) / "ssl/openssl.cnf" + if not cnf_file.exists(): + cnf_file.write_text(OPENSSL_CONFIG) + + # Generate a certificate if one doesn't exist on a disk + subprocess.check_call( + [ + "openssl", + "req", + "-new", + "-newkey=rsa:2048", + "-days=365", + "-nodes", + "-x509", + "-subj=/C=XX/ST=XX/L=XX/O=generated/CN=generated", + f"-keyout={pem_file}", + f"-out={pem_file}", + ] + ) + # Restrict access to the file + pem_file.chmod(stat.S_IRUSR | stat.S_IWUSR) + c.ServerApp.certfile = str(pem_file) + +# Change default umask for all subprocesses of the Server if set in the environment +if "NB_UMASK" in os.environ: + os.umask(int(os.environ["NB_UMASK"], 8)) diff --git a/docker-stacks/images/base-notebook/start-notebook.py b/docker-stacks/images/base-notebook/start-notebook.py new file mode 100755 index 0000000..973da5a --- /dev/null +++ b/docker-stacks/images/base-notebook/start-notebook.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import os +import shlex +import sys + +# If we are in a JupyterHub, we pass on to `start-singleuser.py` instead so it does the right thing +if "JUPYTERHUB_API_TOKEN" in os.environ: + print( + "WARNING: using start-singleuser.py instead of start-notebook.py to start a server associated with JupyterHub." + ) + command = ["/usr/local/bin/start-singleuser.py"] + sys.argv[1:] + os.execvp(command[0], command) + + +# Entrypoint is start.sh +command = [] + +# If we want to survive restarts, launch the command using `run-one-constantly` +if os.environ.get("RESTARTABLE") == "yes": + command.append("run-one-constantly") + +# We always launch a jupyter subcommand from this script +command.append("jupyter") + +# Launch the configured subcommand. +# Note that this should be a single string, so we don't split it. +# We default to `lab`. +jupyter_command = os.environ.get("DOCKER_STACKS_JUPYTER_CMD", "lab") +command.append(jupyter_command) + +# Append any optional NOTEBOOK_ARGS we were passed in. +# This is supposed to be multiple args passed on to the notebook command, +# so we split it correctly with shlex +if "NOTEBOOK_ARGS" in os.environ: + command += shlex.split(os.environ["NOTEBOOK_ARGS"]) + +# Pass through any other args we were passed on the command line +command += sys.argv[1:] + +# Execute the command! +print("Executing: " + " ".join(command)) +os.execvp(command[0], command) diff --git a/docker-stacks/images/base-notebook/start-notebook.sh b/docker-stacks/images/base-notebook/start-notebook.sh new file mode 100755 index 0000000..c47ebba --- /dev/null +++ b/docker-stacks/images/base-notebook/start-notebook.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Shim to emit warning and call start-notebook.py +echo "WARNING: Use start-notebook.py instead" + +exec /usr/local/bin/start-notebook.py "$@" diff --git a/docker-stacks/images/base-notebook/start-singleuser.py b/docker-stacks/images/base-notebook/start-singleuser.py new file mode 100755 index 0000000..c80339f --- /dev/null +++ b/docker-stacks/images/base-notebook/start-singleuser.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import os +import shlex +import sys + +# Entrypoint is start.sh +command = ["jupyterhub-singleuser"] + +# set default ip to 0.0.0.0 +if "--ip=" not in os.environ.get("NOTEBOOK_ARGS", ""): + command.append("--ip=0.0.0.0") + +# Append any optional NOTEBOOK_ARGS we were passed in. +# This is supposed to be multiple args passed on to the notebook command, +# so we split it correctly with shlex +if "NOTEBOOK_ARGS" in os.environ: + command += shlex.split(os.environ["NOTEBOOK_ARGS"]) + +# Pass any other args we have been passed through +command += sys.argv[1:] + +# Execute the command! +print("Executing: " + " ".join(command)) +os.execvp(command[0], command) diff --git a/docker-stacks/images/base-notebook/start-singleuser.sh b/docker-stacks/images/base-notebook/start-singleuser.sh new file mode 100755 index 0000000..ecf0e06 --- /dev/null +++ b/docker-stacks/images/base-notebook/start-singleuser.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Shim to emit warning and call start-singleuser.py +echo "WARNING: Use start-singleuser.py instead" + +exec /usr/local/bin/start-singleuser.py "$@" diff --git a/docker-stacks/images/datascience-notebook/.dockerignore b/docker-stacks/images/datascience-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/datascience-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/datascience-notebook/Dockerfile b/docker-stacks/images/datascience-notebook/Dockerfile new file mode 100644 index 0000000..1d5f13a --- /dev/null +++ b/docker-stacks/images/datascience-notebook/Dockerfile @@ -0,0 +1,63 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/scipy-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# R pre-requisites +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + fonts-dejavu \ + gfortran \ + gcc && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Julia dependencies +# install Julia packages in /opt/julia instead of ${HOME} +ENV JULIA_DEPOT_PATH=/opt/julia \ + JULIA_PKGDIR=/opt/julia + +# Setup Julia +RUN /opt/setup-scripts/setup_julia.py + +USER ${NB_UID} + +# Setup IJulia kernel & other packages +RUN /opt/setup-scripts/setup-julia-packages.bash + +# R packages including IRKernel which gets installed globally. +# r-e1071: dependency of the caret R package +RUN mamba install --yes \ + 'r-base' \ + 'r-caret' \ + 'r-crayon' \ + 'r-devtools' \ + 'r-e1071' \ + 'r-forecast' \ + 'r-hexbin' \ + 'r-htmltools' \ + 'r-htmlwidgets' \ + 'r-irkernel' \ + 'r-nycflights13' \ + 'r-randomforest' \ + 'r-rcurl' \ + 'r-rmarkdown' \ + 'r-rodbc' \ + 'r-rsqlite' \ + 'r-shiny' \ + 'r-tidymodels' \ + 'r-tidyverse' \ + 'rpy2' \ + 'unixodbc' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/datascience-notebook/README.md b/docker-stacks/images/datascience-notebook/README.md new file mode 100644 index 0000000..42f1e04 --- /dev/null +++ b/docker-stacks/images/datascience-notebook/README.md @@ -0,0 +1,14 @@ +# Jupyter Notebook Data Science Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/datascience-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/datascience-notebook.svg)](https://hub.docker.com/r/jupyter/datascience-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/datascience-notebook.svg)](https://hub.docker.com/r/jupyter/datascience-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/datascience-notebook/latest)](https://hub.docker.com/r/jupyter/datascience-notebook/ "jupyter/datascience-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/datascience-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-datascience-notebook) diff --git a/docker-stacks/images/docker-stacks-foundation/.dockerignore b/docker-stacks/images/docker-stacks-foundation/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/docker-stacks-foundation/10activate-conda-env.sh b/docker-stacks/images/docker-stacks-foundation/10activate-conda-env.sh new file mode 100755 index 0000000..ed7347f --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/10activate-conda-env.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# This registers the initialization code for the conda shell code +# It also activates default environment in the end, so we don't need to activate it manually +# Documentation: https://docs.conda.io/projects/conda/en/latest/dev-guide/deep-dives/activation.html +eval "$(conda shell.bash hook)" diff --git a/docker-stacks/images/docker-stacks-foundation/Dockerfile b/docker-stacks/images/docker-stacks-foundation/Dockerfile new file mode 100644 index 0000000..68450ac --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/Dockerfile @@ -0,0 +1,143 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Ubuntu 22.04 (jammy) +# https://hub.docker.com/_/ubuntu/tags?page=1&name=jammy +ARG ROOT_CONTAINER=ubuntu:22.04 + +FROM $ROOT_CONTAINER + +LABEL maintainer="Jupyter Project " +ARG NB_USER="jovyan" +ARG NB_UID="1000" +ARG NB_GID="100" + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# Install all OS dependencies for the Server that starts +# but lacks all features (e.g., download as all possible file formats) +ENV DEBIAN_FRONTEND noninteractive +RUN apt-get update --yes && \ + # - `apt-get upgrade` is run to patch known vulnerabilities in system packages + # as the Ubuntu base image is rebuilt too seldom sometimes (less than once a month) + apt-get upgrade --yes && \ + apt-get install --yes --no-install-recommends \ + # - bzip2 is necessary to extract the micromamba executable. + bzip2 \ + ca-certificates \ + locales \ + sudo \ + # - `tini` is installed as a helpful container entrypoint, + # that reaps zombie processes and such of the actual executable we want to start + # See https://github.com/krallin/tini#why-tini for details + tini \ + wget && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen + +# Configure environment +ENV CONDA_DIR=/opt/conda \ + SHELL=/bin/bash \ + NB_USER="${NB_USER}" \ + NB_UID=${NB_UID} \ + NB_GID=${NB_GID} \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 \ + LANGUAGE=en_US.UTF-8 +ENV PATH="${CONDA_DIR}/bin:${PATH}" \ + HOME="/home/${NB_USER}" + +# Copy a script that we will use to correct permissions after running certain commands +COPY fix-permissions /usr/local/bin/fix-permissions +RUN chmod a+rx /usr/local/bin/fix-permissions + +# Enable prompt color in the skeleton .bashrc before creating the default NB_USER +# hadolint ignore=SC2016 +RUN sed -i 's/^#force_color_prompt=yes/force_color_prompt=yes/' /etc/skel/.bashrc && \ + # More information in: https://github.com/jupyter/docker-stacks/pull/2047 + # and docs: https://docs.conda.io/projects/conda/en/latest/dev-guide/deep-dives/activation.html + echo 'eval "$(conda shell.bash hook)"' >> /etc/skel/.bashrc + +# Create NB_USER with name jovyan user with UID=1000 and in the 'users' group +# and make sure these dirs are writable by the `users` group. +RUN echo "auth requisite pam_deny.so" >> /etc/pam.d/su && \ + sed -i.bak -e 's/^%admin/#%admin/' /etc/sudoers && \ + sed -i.bak -e 's/^%sudo/#%sudo/' /etc/sudoers && \ + useradd --no-log-init --create-home --shell /bin/bash --uid "${NB_UID}" --no-user-group "${NB_USER}" && \ + mkdir -p "${CONDA_DIR}" && \ + chown "${NB_USER}:${NB_GID}" "${CONDA_DIR}" && \ + chmod g+w /etc/passwd && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +USER ${NB_UID} + +# Pin the Python version here, or set it to "default" +ARG PYTHON_VERSION=3.11 + +# Setup work directory for backward-compatibility +RUN mkdir "/home/${NB_USER}/work" && \ + fix-permissions "/home/${NB_USER}" + +# Download and install Micromamba, and initialize the Conda prefix. +# +# Similar projects using Micromamba: +# - Micromamba-Docker: +# - repo2docker: +# Install Python, Mamba, and jupyter_core +# Cleanup temporary files and remove Micromamba +# Correct permissions +# Do all this in a single RUN command to avoid duplicating all of the +# files across image layers when the permissions change +COPY --chown="${NB_UID}:${NB_GID}" initial-condarc "${CONDA_DIR}/.condarc" +WORKDIR /tmp +RUN set -x && \ + arch=$(uname -m) && \ + if [ "${arch}" = "x86_64" ]; then \ + # Should be simpler, see + arch="64"; \ + fi && \ + # https://mamba.readthedocs.io/en/latest/installation/micromamba-installation.html#linux-and-macos + wget --progress=dot:giga -O - \ + "https://micro.mamba.pm/api/micromamba/linux-${arch}/latest" | tar -xvj bin/micromamba && \ + PYTHON_SPECIFIER="python=${PYTHON_VERSION}" && \ + if [[ "${PYTHON_VERSION}" == "default" ]]; then PYTHON_SPECIFIER="python"; fi && \ + # Install the packages + ./bin/micromamba install \ + --root-prefix="${CONDA_DIR}" \ + --prefix="${CONDA_DIR}" \ + --yes \ + "${PYTHON_SPECIFIER}" \ + 'mamba' \ + 'jupyter_core' && \ + rm -rf /tmp/bin/ && \ + # Pin major.minor version of python + # https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-pkgs.html#preventing-packages-from-updating-pinning + mamba list --full-name 'python' | tail -1 | tr -s ' ' | cut -d ' ' -f 1,2 | sed 's/\.[^.]*$/.*/' >> "${CONDA_DIR}/conda-meta/pinned" && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Copy local files as late as possible to avoid cache busting +COPY run-hooks.sh start.sh /usr/local/bin/ + +# Configure container entrypoint +ENTRYPOINT ["tini", "-g", "--", "start.sh"] + +USER root + +# Create dirs for startup hooks +RUN mkdir /usr/local/bin/start-notebook.d && \ + mkdir /usr/local/bin/before-notebook.d + +COPY 10activate-conda-env.sh /usr/local/bin/before-notebook.d/ + +# Switch back to jovyan to avoid accidental container runs as root +USER ${NB_UID} + +WORKDIR "${HOME}" diff --git a/docker-stacks/images/docker-stacks-foundation/README.md b/docker-stacks/images/docker-stacks-foundation/README.md new file mode 100644 index 0000000..6665752 --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/README.md @@ -0,0 +1,14 @@ +# Foundation Jupyter Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/docker-stacks-foundation)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/docker-stacks-foundation.svg)](https://hub.docker.com/r/jupyter/docker-stacks-foundation/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/docker-stacks-foundation.svg)](https://hub.docker.com/r/jupyter/docker-stacks-foundation/) +[![image size](https://img.shields.io/docker/image-size/jupyter/docker-stacks-foundation/latest)](https://hub.docker.com/r/jupyter/docker-stacks-foundation/ "jupyter/docker-stacks-foundation image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/docker-stacks-foundation](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-docker-stacks-foundation) diff --git a/docker-stacks/images/docker-stacks-foundation/fix-permissions b/docker-stacks/images/docker-stacks-foundation/fix-permissions new file mode 100755 index 0000000..47b6d0e --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/fix-permissions @@ -0,0 +1,33 @@ +#!/bin/bash +# Set permissions on a directory +# After any installation, if a directory needs to be (human) user-writable, run this script on it. +# It will make everything in the directory owned by the group ${NB_GID} and writable by that group. +# Deployments that want to set a specific user id can preserve permissions +# by adding the `--group-add users` line to `docker run`. + +# Uses find to avoid touching files that already have the right permissions, +# which would cause a massive image explosion + +# Right permissions are: +# group=${NB_GID} +# AND permissions include group rwX (directory-execute) +# AND directories have setuid,setgid bits set + +set -e + +for d in "$@"; do + find "${d}" \ + ! \( \ + -group "${NB_GID}" \ + -a -perm -g+rwX \ + \) \ + -exec chgrp "${NB_GID}" -- {} \+ \ + -exec chmod g+rwX -- {} \+ + # setuid, setgid *on directories only* + find "${d}" \ + \( \ + -type d \ + -a ! -perm -6000 \ + \) \ + -exec chmod +6000 -- {} \+ +done diff --git a/docker-stacks/images/docker-stacks-foundation/initial-condarc b/docker-stacks/images/docker-stacks-foundation/initial-condarc new file mode 100644 index 0000000..383aad3 --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/initial-condarc @@ -0,0 +1,6 @@ +# Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html + +auto_update_conda: false +show_channel_urls: true +channels: + - conda-forge diff --git a/docker-stacks/images/docker-stacks-foundation/run-hooks.sh b/docker-stacks/images/docker-stacks-foundation/run-hooks.sh new file mode 100755 index 0000000..15df23c --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/run-hooks.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# The run-hooks.sh script looks for *.sh scripts to source +# and executable files to run within a passed directory + +if [ "$#" -ne 1 ]; then + echo "Should pass exactly one directory" + return 1 +fi + +if [[ ! -d "${1}" ]]; then + echo "Directory ${1} doesn't exist or is not a directory" + return 1 +fi + +echo "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)" +for f in "${1}/"*; do + # Handling a case when the directory is empty + [ -e "${f}" ] || continue + case "${f}" in + *.sh) + echo "Sourcing shell script: ${f}" + # shellcheck disable=SC1090 + source "${f}" + # shellcheck disable=SC2181 + if [ $? -ne 0 ]; then + echo "${f} has failed, continuing execution" + fi + ;; + *) + if [ -x "${f}" ]; then + echo "Running executable: ${f}" + "${f}" + # shellcheck disable=SC2181 + if [ $? -ne 0 ]; then + echo "${f} has failed, continuing execution" + fi + else + echo "Ignoring non-executable: ${f}" + fi + ;; + esac +done +echo "Done running hooks in: ${1}" diff --git a/docker-stacks/images/docker-stacks-foundation/start.sh b/docker-stacks/images/docker-stacks-foundation/start.sh new file mode 100755 index 0000000..33d12d8 --- /dev/null +++ b/docker-stacks/images/docker-stacks-foundation/start.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +set -e + +# The _log function is used for everything this script wants to log. +# It will always log errors and warnings but can be silenced for other messages +# by setting the JUPYTER_DOCKER_STACKS_QUIET environment variable. +_log () { + if [[ "$*" == "ERROR:"* ]] || [[ "$*" == "WARNING:"* ]] || [[ "${JUPYTER_DOCKER_STACKS_QUIET}" == "" ]]; then + echo "$@" + fi +} +_log "Entered start.sh with args:" "$@" + +# A helper function to unset env vars listed in the value of the env var +# JUPYTER_ENV_VARS_TO_UNSET. +unset_explicit_env_vars () { + if [ -n "${JUPYTER_ENV_VARS_TO_UNSET}" ]; then + for env_var_to_unset in $(echo "${JUPYTER_ENV_VARS_TO_UNSET}" | tr ',' ' '); do + _log "Unset ${env_var_to_unset} due to JUPYTER_ENV_VARS_TO_UNSET" + unset "${env_var_to_unset}" + done + unset JUPYTER_ENV_VARS_TO_UNSET + fi +} + + +# Default to starting bash if no command was specified +if [ $# -eq 0 ]; then + cmd=( "bash" ) +else + cmd=( "$@" ) +fi + +# Backwards compatibility: `start.sh` is executed by default in ENTRYPOINT +# so it should no longer be specified in CMD +if [ "${_START_SH_EXECUTED}" = "1" ]; then + _log "WARNING: start.sh is the default ENTRYPOINT, do not include it in CMD" + _log "Executing the command:" "${cmd[@]}" + exec "${cmd[@]}" +else + export _START_SH_EXECUTED=1 +fi + + +# NOTE: This hook will run as the user the container was started with! +# shellcheck disable=SC1091 +source /usr/local/bin/run-hooks.sh /usr/local/bin/start-notebook.d + +# If the container started as the root user, then we have permission to refit +# the jovyan user, and ensure file permissions, grant sudo rights, and such +# things before we run the command passed to start.sh as the desired user +# (NB_USER). +# +if [ "$(id -u)" == 0 ]; then + # Environment variables: + # - NB_USER: the desired username and associated home folder + # - NB_UID: the desired user id + # - NB_GID: a group id we want our user to belong to + # - NB_GROUP: a group name we want for the group + # - GRANT_SUDO: a boolean ("1" or "yes") to grant the user sudo rights + # - CHOWN_HOME: a boolean ("1" or "yes") to chown the user's home folder + # - CHOWN_EXTRA: a comma-separated list of paths to chown + # - CHOWN_HOME_OPTS / CHOWN_EXTRA_OPTS: arguments to the chown commands + + # Refit the jovyan user to the desired user (NB_USER) + if id jovyan &> /dev/null; then + if ! usermod --home "/home/${NB_USER}" --login "${NB_USER}" jovyan 2>&1 | grep "no changes" > /dev/null; then + _log "Updated the jovyan user:" + _log "- username: jovyan -> ${NB_USER}" + _log "- home dir: /home/jovyan -> /home/${NB_USER}" + fi + elif ! id -u "${NB_USER}" &> /dev/null; then + _log "ERROR: Neither the jovyan user nor '${NB_USER}' exists. This could be the result of stopping and starting, the container with a different NB_USER environment variable." + exit 1 + fi + # Ensure the desired user (NB_USER) gets its desired user id (NB_UID) and is + # a member of the desired group (NB_GROUP, NB_GID) + if [ "${NB_UID}" != "$(id -u "${NB_USER}")" ] || [ "${NB_GID}" != "$(id -g "${NB_USER}")" ]; then + _log "Update ${NB_USER}'s UID:GID to ${NB_UID}:${NB_GID}" + # Ensure the desired group's existence + if [ "${NB_GID}" != "$(id -g "${NB_USER}")" ]; then + groupadd --force --gid "${NB_GID}" --non-unique "${NB_GROUP:-${NB_USER}}" + fi + # Recreate the desired user as we want it + userdel "${NB_USER}" + useradd --no-log-init --home "/home/${NB_USER}" --shell /bin/bash --uid "${NB_UID}" --gid "${NB_GID}" --groups 100 "${NB_USER}" + fi + # Update the home directory if the desired user (NB_USER) is root and the + # desired user id (NB_UID) is 0 and the desired group id (NB_GID) is 0. + if [ "${NB_USER}" = "root" ] && [ "${NB_UID}" = "$(id -u "${NB_USER}")" ] && [ "${NB_GID}" = "$(id -g "${NB_USER}")" ]; then + sed -i "s|/root|/home/root|g" /etc/passwd + # Do not preserve ownership in rootless mode + CP_OPTS="-a --no-preserve=ownership" + fi + + # Move or symlink the jovyan home directory to the desired user's home + # directory if it doesn't already exist, and update the current working + # directory to the new location if needed. + if [[ "${NB_USER}" != "jovyan" ]]; then + if [[ ! -e "/home/${NB_USER}" ]]; then + _log "Attempting to copy /home/jovyan to /home/${NB_USER}..." + mkdir "/home/${NB_USER}" + # shellcheck disable=SC2086 + if cp ${CP_OPTS:--a} /home/jovyan/. "/home/${NB_USER}/"; then + _log "Success!" + else + _log "Failed to copy data from /home/jovyan to /home/${NB_USER}!" + _log "Attempting to symlink /home/jovyan to /home/${NB_USER}..." + if ln -s /home/jovyan "/home/${NB_USER}"; then + _log "Success creating symlink!" + else + _log "ERROR: Failed copy data from /home/jovyan to /home/${NB_USER} or to create symlink!" + exit 1 + fi + fi + fi + # Ensure the current working directory is updated to the new path + if [[ "${PWD}/" == "/home/jovyan/"* ]]; then + new_wd="/home/${NB_USER}/${PWD:13}" + _log "Changing working directory to ${new_wd}" + cd "${new_wd}" + fi + fi + + # Optionally ensure the desired user gets filesystem ownership of its home + # folder and/or additional folders + if [[ "${CHOWN_HOME}" == "1" || "${CHOWN_HOME}" == "yes" ]]; then + _log "Ensuring /home/${NB_USER} is owned by ${NB_UID}:${NB_GID} ${CHOWN_HOME_OPTS:+(chown options: ${CHOWN_HOME_OPTS})}" + # shellcheck disable=SC2086 + chown ${CHOWN_HOME_OPTS} "${NB_UID}:${NB_GID}" "/home/${NB_USER}" + fi + if [ -n "${CHOWN_EXTRA}" ]; then + for extra_dir in $(echo "${CHOWN_EXTRA}" | tr ',' ' '); do + _log "Ensuring ${extra_dir} is owned by ${NB_UID}:${NB_GID} ${CHOWN_EXTRA_OPTS:+(chown options: ${CHOWN_EXTRA_OPTS})}" + # shellcheck disable=SC2086 + chown ${CHOWN_EXTRA_OPTS} "${NB_UID}:${NB_GID}" "${extra_dir}" + done + fi + + # Prepend ${CONDA_DIR}/bin to sudo secure_path + sed -r "s#Defaults\s+secure_path\s*=\s*\"?([^\"]+)\"?#Defaults secure_path=\"${CONDA_DIR}/bin:\1\"#" /etc/sudoers | grep secure_path > /etc/sudoers.d/path + + # Optionally grant passwordless sudo rights for the desired user + if [[ "${GRANT_SUDO}" == "1" || "${GRANT_SUDO}" == "yes" ]]; then + _log "Granting ${NB_USER} passwordless sudo rights!" + echo "${NB_USER} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers.d/added-by-start-script + fi + + # NOTE: This hook is run as the root user! + # shellcheck disable=SC1091 + source /usr/local/bin/run-hooks.sh /usr/local/bin/before-notebook.d + unset_explicit_env_vars + + _log "Running as ${NB_USER}:" "${cmd[@]}" + exec sudo --preserve-env --set-home --user "${NB_USER}" \ + LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + PATH="${PATH}" \ + PYTHONPATH="${PYTHONPATH:-}" \ + "${cmd[@]}" + # Notes on how we ensure that the environment that this container is started + # with is preserved (except vars listed in JUPYTER_ENV_VARS_TO_UNSET) when + # we transition from running as root to running as NB_USER. + # + # - We use `sudo` to execute the command as NB_USER. What then + # happens to the environment will be determined by configuration in + # /etc/sudoers and /etc/sudoers.d/* as well as flags we pass to the sudo + # command. The behavior can be inspected with `sudo -V` run as root. + # + # ref: `man sudo` https://linux.die.net/man/8/sudo + # ref: `man sudoers` https://www.sudo.ws/docs/man/sudoers.man/ + # + # - We use the `--preserve-env` flag to pass through most environment + # variables, but understand that exceptions are caused by the sudoers + # configuration: `env_delete` and `env_check`. + # + # - We use the `--set-home` flag to set the HOME variable appropriately. + # + # - To reduce the default list of variables deleted by sudo, we could have + # used `env_delete` from /etc/sudoers. It has a higher priority than the + # `--preserve-env` flag and the `env_keep` configuration. + # + # - We preserve LD_LIBRARY_PATH, PATH and PYTHONPATH explicitly. Note however that sudo + # resolves `${cmd[@]}` using the "secure_path" variable we modified + # above in /etc/sudoers.d/path. Thus PATH is irrelevant to how the above + # sudo command resolves the path of `${cmd[@]}`. The PATH will be relevant + # for resolving paths of any subprocesses spawned by `${cmd[@]}`. + +# The container didn't start as the root user, so we will have to act as the +# user we started as. +else + # Warn about misconfiguration of: granting sudo rights + if [[ "${GRANT_SUDO}" == "1" || "${GRANT_SUDO}" == "yes" ]]; then + _log "WARNING: container must be started as root to grant sudo permissions!" + fi + + JOVYAN_UID="$(id -u jovyan 2>/dev/null)" # The default UID for the jovyan user + JOVYAN_GID="$(id -g jovyan 2>/dev/null)" # The default GID for the jovyan user + + # Attempt to ensure the user uid we currently run as has a named entry in + # the /etc/passwd file, as it avoids software crashing on hard assumptions + # on such entry. Writing to the /etc/passwd was allowed for the root group + # from the Dockerfile during the build. + # + # ref: https://github.com/jupyter/docker-stacks/issues/552 + if ! whoami &> /dev/null; then + _log "There is no entry in /etc/passwd for our UID=$(id -u). Attempting to fix..." + if [[ -w /etc/passwd ]]; then + _log "Renaming old jovyan user to nayvoj ($(id -u jovyan):$(id -g jovyan))" + + # We cannot use "sed --in-place" since sed tries to create a temp file in + # /etc/ and we may not have write access. Apply sed on our own temp file: + sed --expression="s/^jovyan:/nayvoj:/" /etc/passwd > /tmp/passwd + echo "${NB_USER}:x:$(id -u):$(id -g):,,,:/home/jovyan:/bin/bash" >> /tmp/passwd + cat /tmp/passwd > /etc/passwd + rm /tmp/passwd + + _log "Added new ${NB_USER} user ($(id -u):$(id -g)). Fixed UID!" + + if [[ "${NB_USER}" != "jovyan" ]]; then + _log "WARNING: user is ${NB_USER} but home is /home/jovyan. You must run as root to rename the home directory!" + fi + else + _log "WARNING: unable to fix missing /etc/passwd entry because we don't have write permission. Try setting gid=0 with \"--user=$(id -u):0\"." + fi + fi + + # Warn about misconfiguration of: desired username, user id, or group id. + # A misconfiguration occurs when the user modifies the default values of + # NB_USER, NB_UID, or NB_GID, but we cannot update those values because we + # are not root. + if [[ "${NB_USER}" != "jovyan" && "${NB_USER}" != "$(id -un)" ]]; then + _log "WARNING: container must be started as root to change the desired user's name with NB_USER=\"${NB_USER}\"!" + fi + if [[ "${NB_UID}" != "${JOVYAN_UID}" && "${NB_UID}" != "$(id -u)" ]]; then + _log "WARNING: container must be started as root to change the desired user's id with NB_UID=\"${NB_UID}\"!" + fi + if [[ "${NB_GID}" != "${JOVYAN_GID}" && "${NB_GID}" != "$(id -g)" ]]; then + _log "WARNING: container must be started as root to change the desired user's group id with NB_GID=\"${NB_GID}\"!" + fi + + # Warn if the user isn't able to write files to ${HOME} + if [[ ! -w /home/jovyan ]]; then + _log "WARNING: no write access to /home/jovyan. Try starting the container with group 'users' (100), e.g. using \"--group-add=users\"." + fi + + # NOTE: This hook is run as the user we started the container as! + # shellcheck disable=SC1091 + source /usr/local/bin/run-hooks.sh /usr/local/bin/before-notebook.d + unset_explicit_env_vars + + _log "Executing the command:" "${cmd[@]}" + exec "${cmd[@]}" +fi diff --git a/docker-stacks/images/julia-notebook/.dockerignore b/docker-stacks/images/julia-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/julia-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/julia-notebook/Dockerfile b/docker-stacks/images/julia-notebook/Dockerfile new file mode 100644 index 0000000..9dbfd7f --- /dev/null +++ b/docker-stacks/images/julia-notebook/Dockerfile @@ -0,0 +1,27 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/minimal-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# Julia dependencies +# install Julia packages in /opt/julia instead of ${HOME} +ENV JULIA_DEPOT_PATH=/opt/julia \ + JULIA_PKGDIR=/opt/julia + +# Setup Julia +RUN /opt/setup-scripts/setup_julia.py + +USER ${NB_UID} + +# Setup IJulia kernel & other packages +RUN /opt/setup-scripts/setup-julia-packages.bash diff --git a/docker-stacks/images/julia-notebook/README.md b/docker-stacks/images/julia-notebook/README.md new file mode 100644 index 0000000..4672067 --- /dev/null +++ b/docker-stacks/images/julia-notebook/README.md @@ -0,0 +1,14 @@ +# Jupyter Notebook Julia Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/julia-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/julia-notebook.svg)](https://hub.docker.com/r/jupyter/julia-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/julia-notebook.svg)](https://hub.docker.com/r/jupyter/julia-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/julia-notebook/latest)](https://hub.docker.com/r/jupyter/julia-notebook/ "jupyter/julia-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/julia-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-julia-notebook) diff --git a/docker-stacks/images/minimal-notebook/.dockerignore b/docker-stacks/images/minimal-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/minimal-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/minimal-notebook/Dockerfile b/docker-stacks/images/minimal-notebook/Dockerfile new file mode 100644 index 0000000..e77798c --- /dev/null +++ b/docker-stacks/images/minimal-notebook/Dockerfile @@ -0,0 +1,50 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/base-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# Install all OS dependencies for a fully functional Server +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + # Common useful utilities + curl \ + git \ + nano-tiny \ + tzdata \ + unzip \ + vim-tiny \ + # git-over-ssh + openssh-client \ + # `less` is needed to run help in R + # see: https://github.com/jupyter/docker-stacks/issues/1588 + less \ + # `nbconvert` dependencies + # https://nbconvert.readthedocs.io/en/latest/install.html#installing-tex + texlive-xetex \ + texlive-fonts-recommended \ + texlive-plain-generic \ + # Enable clipboard on Linux host systems + xclip && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Create alternative for nano -> nano-tiny +RUN update-alternatives --install /usr/bin/nano nano /bin/nano-tiny 10 + +# Switch back to jovyan to avoid accidental container runs as root +USER ${NB_UID} + +# Add an R mimetype option to specify how the plot returns from R to the browser +COPY --chown=${NB_UID}:${NB_GID} Rprofile.site /opt/conda/lib/R/etc/ + +# Add setup scripts that may be used by downstream images or inherited images +COPY setup-scripts/ /opt/setup-scripts/ diff --git a/docker-stacks/images/minimal-notebook/README.md b/docker-stacks/images/minimal-notebook/README.md new file mode 100644 index 0000000..0d0f44f --- /dev/null +++ b/docker-stacks/images/minimal-notebook/README.md @@ -0,0 +1,14 @@ +# Minimal Jupyter Notebook Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/minimal-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/minimal-notebook.svg)](https://hub.docker.com/r/jupyter/minimal-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/minimal-notebook.svg)](https://hub.docker.com/r/jupyter/minimal-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/minimal-notebook/latest)](https://hub.docker.com/r/jupyter/minimal-notebook/ "jupyter/minimal-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/minimal-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-minimal-notebook) diff --git a/docker-stacks/images/minimal-notebook/Rprofile.site b/docker-stacks/images/minimal-notebook/Rprofile.site new file mode 100644 index 0000000..3d6a93c --- /dev/null +++ b/docker-stacks/images/minimal-notebook/Rprofile.site @@ -0,0 +1,4 @@ +# Add R mimetype to specify how the plot returns from R to the browser. +# https://notebook.community/andrie/jupyter-notebook-samples/Changing%20R%20plot%20options%20in%20Jupyter + +options(jupyter.plot_mimetypes = c('text/plain', 'image/png', 'image/jpeg', 'image/svg+xml', 'application/pdf')) diff --git a/docker-stacks/images/minimal-notebook/setup-scripts/activate_notebook_custom_env.py b/docker-stacks/images/minimal-notebook/setup-scripts/activate_notebook_custom_env.py new file mode 100755 index 0000000..4d5da9b --- /dev/null +++ b/docker-stacks/images/minimal-notebook/setup-scripts/activate_notebook_custom_env.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import json +import os +import sys +from pathlib import Path + +env_name = sys.argv[1] +CONDA_DIR = os.environ["CONDA_DIR"] + +file = Path.home() / f".local/share/jupyter/kernels/{env_name}/kernel.json" +content = json.loads(file.read_text()) +content["env"] = { + "XML_CATALOG_FILES": "", + "PATH": f"{CONDA_DIR}/envs/{env_name}/bin:$PATH", + "CONDA_PREFIX": f"{CONDA_DIR}/envs/{env_name}", + "CONDA_PROMPT_MODIFIER": f"({env_name}) ", + "CONDA_SHLVL": "2", + "CONDA_DEFAULT_ENV": env_name, + "CONDA_PREFIX_1": CONDA_DIR, +} + +file.write_text(json.dumps(content, indent=1)) diff --git a/docker-stacks/images/minimal-notebook/setup-scripts/setup-julia-packages.bash b/docker-stacks/images/minimal-notebook/setup-scripts/setup-julia-packages.bash new file mode 100755 index 0000000..5e8f7e8 --- /dev/null +++ b/docker-stacks/images/minimal-notebook/setup-scripts/setup-julia-packages.bash @@ -0,0 +1,55 @@ +#!/bin/bash +set -exuo pipefail +# Requirements: +# - Run as a non-root user +# - The JULIA_PKGDIR environment variable is set +# - Julia is already set up, with the setup_julia.py command + + +# If we don't specify what CPUs the precompilation should be done for, it's +# *only* done for the target of the host doing the compilation. When the +# container runs on a host that's the same architecture, but a *different* +# generation of CPU than what the build host was, the precompilation is useless +# and Julia takes a long long time to start up. This specific multitarget comes +# from https://github.com/JuliaCI/julia-buildkite/blob/70bde73f6cb17d4381b62236fc2d96b1c7acbba7/utilities/build_envs.sh#L20-L76, +# and may need to be updated as new CPU generations come out. +# If the architecture the container runs on is different, +# precompilation may still have to be re-done on first startup - but this +# *should* catch most of the issues. See +# https://github.com/jupyter/docker-stacks/issues/2015 for more information +if [ "$(uname -m)" == "x86_64" ]; then + # See https://github.com/JuliaCI/julia-buildkite/blob/70bde73f6cb17d4381b62236fc2d96b1c7acbba7/utilities/build_envs.sh#L24 + # for an explanation of these options + export JULIA_CPU_TARGET="generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)" +elif [ "$(uname -m)" == "aarch64" ]; then + # See https://github.com/JuliaCI/julia-buildkite/blob/70bde73f6cb17d4381b62236fc2d96b1c7acbba7/utilities/build_envs.sh#L54 + # for an explanation of these options + export JULIA_CPU_TARGET="generic;cortex-a57;thunderx2t99;carmel" +fi + +# Install base Julia packages +julia -e ' +import Pkg; +Pkg.update(); +Pkg.add([ + "HDF5", + "IJulia", + "Pluto" +]); +Pkg.precompile(); +' + +# Move the kernelspec out of ${HOME} to the system share location. +# Avoids problems with runtime UID change not taking effect properly +# on the .local folder in the jovyan home dir. +mv "${HOME}/.local/share/jupyter/kernels/julia"* "${CONDA_DIR}/share/jupyter/kernels/" +chmod -R go+rx "${CONDA_DIR}/share/jupyter" +rm -rf "${HOME}/.local" +fix-permissions "${JULIA_PKGDIR}" "${CONDA_DIR}/share/jupyter" + +# Install jupyter-pluto-proxy to get Pluto to work on JupyterHub +mamba install --yes \ + 'jupyter-pluto-proxy' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/minimal-notebook/setup-scripts/setup_julia.py b/docker-stacks/images/minimal-notebook/setup-scripts/setup_julia.py new file mode 100755 index 0000000..114e64c --- /dev/null +++ b/docker-stacks/images/minimal-notebook/setup-scripts/setup_julia.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Requirements: +# - Run as the root user +# - The JULIA_PKGDIR environment variable is set + +import logging +import os +import platform +import shutil +import subprocess +from pathlib import Path + +import requests + +LOGGER = logging.getLogger(__name__) + + +def unify_aarch64(platform: str) -> str: + """ + Renames arm64->aarch64 to support local builds on on aarch64 Macs + """ + return { + "aarch64": "aarch64", + "arm64": "aarch64", + "x86_64": "x86_64", + }[platform] + + +def get_latest_julia_url() -> tuple[str, str]: + """ + Get the last stable version of Julia + Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813 + """ + LOGGER.info("Downloading Julia versions information") + versions = requests.get( + "https://julialang-s3.julialang.org/bin/versions.json" + ).json() + stable_versions = {k: v for k, v in versions.items() if v["stable"]} + # Compare versions semantically + latest_stable_version = max( + stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")] + ) + latest_version_files = stable_versions[latest_stable_version]["files"] + triplet = unify_aarch64(platform.machine()) + "-linux-gnu" + file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0] + LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}") + return file_info["url"], file_info["version"] + + +def download_julia(julia_url: str) -> None: + """ + Downloads and unpacks julia + The resulting julia directory is "/opt/julia-VERSION/" + """ + LOGGER.info("Downloading and unpacking Julia") + tmp_file = Path("/tmp/julia.tar.gz") + subprocess.check_call( + ["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url] + ) + shutil.unpack_archive(tmp_file, "/opt/") + tmp_file.unlink() + + +def configure_julia(julia_version: str) -> None: + """ + Creates /usr/local/bin/julia symlink + Make Julia aware of conda libraries + Creates a directory for Julia user libraries + """ + LOGGER.info("Configuring Julia") + # Link Julia installed version to /usr/local/bin, so julia launches it + subprocess.check_call( + ["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"] + ) + + # Tell Julia where conda libraries are + Path("/etc/julia").mkdir() + Path("/etc/julia/juliarc.jl").write_text( + f'push!(Libdl.DL_LOAD_PATH, "{os.environ["CONDA_DIR"]}/lib")\n' + ) + + # Create JULIA_PKGDIR, where user libraries are installed + JULIA_PKGDIR = Path(os.environ["JULIA_PKGDIR"]) + JULIA_PKGDIR.mkdir() + subprocess.check_call(["chown", os.environ["NB_USER"], JULIA_PKGDIR]) + subprocess.check_call(["fix-permissions", JULIA_PKGDIR]) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + julia_url, julia_version = get_latest_julia_url() + download_julia(julia_url=julia_url) + configure_julia(julia_version=julia_version) diff --git a/docker-stacks/images/pyspark-notebook/.dockerignore b/docker-stacks/images/pyspark-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/pyspark-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/pyspark-notebook/Dockerfile b/docker-stacks/images/pyspark-notebook/Dockerfile new file mode 100644 index 0000000..c9c9326 --- /dev/null +++ b/docker-stacks/images/pyspark-notebook/Dockerfile @@ -0,0 +1,73 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/scipy-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# Spark dependencies +# Default values can be overridden at build time +# (ARGS are in lowercase to distinguish them from ENV) +ARG openjdk_version="17" + +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + "openjdk-${openjdk_version}-jre-headless" \ + ca-certificates-java && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# If spark_version is not set, latest stable Spark will be installed +ARG spark_version +ARG hadoop_version="3" +# If scala_version is not set, Spark without Scala will be installed +ARG scala_version +# URL to use for Spark downloads +# You need to use https://archive.apache.org/dist/spark/ website if you want to download old Spark versions +# But it seems to be slower, that's why we use the recommended site for download +ARG spark_download_url="https://dlcdn.apache.org/spark/" + +ENV SPARK_HOME=/usr/local/spark +ENV PATH="${PATH}:${SPARK_HOME}/bin" +ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" + +COPY setup_spark.py /opt/setup-scripts/ + +# Setup Spark +RUN /opt/setup-scripts/setup_spark.py \ + --spark-version="${spark_version}" \ + --hadoop-version="${hadoop_version}" \ + --scala-version="${scala_version}" \ + --spark-download-url="${spark_download_url}" + +# Configure IPython system-wide +COPY ipython_kernel_config.py "/etc/ipython/" +RUN fix-permissions "/etc/ipython/" + +USER ${NB_UID} + +# Install pyarrow +# NOTE: It's important to ensure compatibility between Pandas versions. +# The pandas version in this Dockerfile should match the version +# on which the Pandas API for Spark is built. +# To find the right version: +# 1. Check out the Spark branch you are on: +# 2. Find the pandas version in the file `dev/infra/Dockerfile`. +RUN mamba install --yes \ + 'grpcio-status' \ + 'grpcio' \ + 'pandas=2.0.3' \ + 'pyarrow' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +WORKDIR "${HOME}" +EXPOSE 4040 diff --git a/docker-stacks/images/pyspark-notebook/README.md b/docker-stacks/images/pyspark-notebook/README.md new file mode 100644 index 0000000..c1c5e9a --- /dev/null +++ b/docker-stacks/images/pyspark-notebook/README.md @@ -0,0 +1,15 @@ +# Jupyter Notebook Python, Spark Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/pyspark-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/pyspark-notebook.svg)](https://hub.docker.com/r/jupyter/pyspark-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/pyspark-notebook.svg)](https://hub.docker.com/r/jupyter/pyspark-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/pyspark-notebook/latest)](https://hub.docker.com/r/jupyter/pyspark-notebook/ "jupyter/pyspark-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/pyspark-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-pyspark-notebook) +- [Image Specifics :: Apache Spark](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#apache-spark) diff --git a/docker-stacks/images/pyspark-notebook/ipython_kernel_config.py b/docker-stacks/images/pyspark-notebook/ipython_kernel_config.py new file mode 100644 index 0000000..921e6fa --- /dev/null +++ b/docker-stacks/images/pyspark-notebook/ipython_kernel_config.py @@ -0,0 +1,13 @@ +# Configuration file for ipython-kernel. +# See + +# With IPython >= 6.0.0, all outputs to stdout/stderr are captured. +# It is the case for subprocesses and output of compiled libraries like Spark. +# Those logs now both head to notebook logs and in notebooks outputs. +# Logs are particularly verbose with Spark, that is why we turn them off through this flag. +# + +# Attempt to capture and forward low-level output, e.g. produced by Extension libraries. +# Default: True +# type:ignore +c.IPKernelApp.capture_fd_output = False # noqa: F821 diff --git a/docker-stacks/images/pyspark-notebook/setup_spark.py b/docker-stacks/images/pyspark-notebook/setup_spark.py new file mode 100755 index 0000000..a494b83 --- /dev/null +++ b/docker-stacks/images/pyspark-notebook/setup_spark.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Requirements: +# - Run as the root user +# - Required env variable: SPARK_HOME + +import argparse +import logging +import os +import subprocess +from pathlib import Path + +import requests +from bs4 import BeautifulSoup + +LOGGER = logging.getLogger(__name__) + + +def get_all_refs(url: str) -> list[str]: + """ + Get all the references for a given webpage + """ + resp = requests.get(url) + soup = BeautifulSoup(resp.text, "html.parser") + return [a["href"] for a in soup.find_all("a", href=True)] + + +def get_latest_spark_version() -> str: + """ + Returns the last stable version of Spark using spark archive + """ + LOGGER.info("Downloading Spark versions information") + all_refs = get_all_refs("https://archive.apache.org/dist/spark/") + stable_versions = [ + ref.removeprefix("spark-").removesuffix("/") + for ref in all_refs + if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref + ] + # Compare versions semantically + latest_version = max( + stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")] + ) + LOGGER.info(f"Latest version: {latest_version}") + return latest_version + + +def download_spark( + spark_version: str, + hadoop_version: str, + scala_version: str, + spark_download_url: Path, +) -> str: + """ + Downloads and unpacks spark + The resulting spark directory name is returned + """ + LOGGER.info("Downloading and unpacking Spark") + spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}" + if scala_version: + spark_dir_name += f"-scala{scala_version}" + LOGGER.info(f"Spark directory name: {spark_dir_name}") + spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz" + + tmp_file = Path("/tmp/spark.tar.gz") + subprocess.check_call( + ["curl", "--progress-bar", "--location", "--output", tmp_file, spark_url] + ) + subprocess.check_call( + [ + "tar", + "xzf", + tmp_file, + "-C", + "/usr/local", + "--owner", + "root", + "--group", + "root", + "--no-same-owner", + ] + ) + tmp_file.unlink() + return spark_dir_name + + +def configure_spark(spark_dir_name: str, spark_home: Path) -> None: + """ + Creates a ${SPARK_HOME} symlink to a versioned spark directory + Creates a 10spark-config.sh symlink to source PYTHONPATH automatically + """ + LOGGER.info("Configuring Spark") + subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home]) + + # Add a link in the before_notebook hook in order to source PYTHONPATH automatically + CONFIG_SCRIPT = "/usr/local/bin/before-notebook.d/10spark-config.sh" + subprocess.check_call( + ["ln", "-s", spark_home / "sbin/spark-config.sh", CONFIG_SCRIPT] + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("--spark-version", required=True) + arg_parser.add_argument("--hadoop-version", required=True) + arg_parser.add_argument("--scala-version", required=True) + arg_parser.add_argument("--spark-download-url", type=Path, required=True) + args = arg_parser.parse_args() + + args.spark_version = args.spark_version or get_latest_spark_version() + + spark_dir_name = download_spark( + spark_version=args.spark_version, + hadoop_version=args.hadoop_version, + scala_version=args.scala_version, + spark_download_url=args.spark_download_url, + ) + configure_spark( + spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"]) + ) diff --git a/docker-stacks/images/pytorch-notebook/.dockerignore b/docker-stacks/images/pytorch-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/pytorch-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/pytorch-notebook/Dockerfile b/docker-stacks/images/pytorch-notebook/Dockerfile new file mode 100644 index 0000000..f1a5c54 --- /dev/null +++ b/docker-stacks/images/pytorch-notebook/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/scipy-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install PyTorch with pip +# hadolint ignore=DL3013 +RUN pip install --no-cache-dir --index-url 'https://download.pytorch.org/whl/cpu' \ + 'torch' \ + 'torchvision' \ + 'torchaudio' && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/pytorch-notebook/README.md b/docker-stacks/images/pytorch-notebook/README.md new file mode 100644 index 0000000..72cc65c --- /dev/null +++ b/docker-stacks/images/pytorch-notebook/README.md @@ -0,0 +1,8 @@ +# Jupyter Notebook Deep Learning Stack + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/pytorch-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-pytorch-notebook) diff --git a/docker-stacks/images/r-notebook/.dockerignore b/docker-stacks/images/r-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/r-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/r-notebook/Dockerfile b/docker-stacks/images/r-notebook/Dockerfile new file mode 100644 index 0000000..bb7f095 --- /dev/null +++ b/docker-stacks/images/r-notebook/Dockerfile @@ -0,0 +1,54 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/minimal-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +# R pre-requisites +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + fonts-dejavu \ + unixodbc \ + unixodbc-dev \ + r-cran-rodbc \ + gfortran \ + gcc && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +USER ${NB_UID} + +# R packages including IRKernel which gets installed globally. +# r-e1071: dependency of the caret R package +RUN mamba install --yes \ + 'r-base' \ + 'r-caret' \ + 'r-crayon' \ + 'r-devtools' \ + 'r-e1071' \ + 'r-forecast' \ + 'r-hexbin' \ + 'r-htmltools' \ + 'r-htmlwidgets' \ + 'r-irkernel' \ + 'r-nycflights13' \ + 'r-randomforest' \ + 'r-rcurl' \ + 'r-rmarkdown' \ + 'r-rodbc' \ + 'r-rsqlite' \ + 'r-shiny' \ + 'r-tidymodels' \ + 'r-tidyverse' \ + 'unixodbc' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/r-notebook/README.md b/docker-stacks/images/r-notebook/README.md new file mode 100644 index 0000000..4d5fc89 --- /dev/null +++ b/docker-stacks/images/r-notebook/README.md @@ -0,0 +1,14 @@ +# Jupyter Notebook R Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/r-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/r-notebook.svg)](https://hub.docker.com/r/jupyter/r-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/r-notebook.svg)](https://hub.docker.com/r/jupyter/r-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/r-notebook/latest)](https://hub.docker.com/r/jupyter/r-notebook/ "jupyter/r-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/r-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-r-notebook) diff --git a/docker-stacks/images/scipy-notebook/.dockerignore b/docker-stacks/images/scipy-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/scipy-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/scipy-notebook/Dockerfile b/docker-stacks/images/scipy-notebook/Dockerfile new file mode 100644 index 0000000..1c8003d --- /dev/null +++ b/docker-stacks/images/scipy-notebook/Dockerfile @@ -0,0 +1,79 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/minimal-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +USER root + +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends \ + # for cython: https://cython.readthedocs.io/en/latest/src/quickstart/install.html + build-essential \ + # for latex labels + cm-super \ + dvipng \ + # for matplotlib anim + ffmpeg && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +USER ${NB_UID} + +# Install Python 3 packages +RUN mamba install --yes \ + 'altair' \ + 'beautifulsoup4' \ + 'bokeh' \ + 'bottleneck' \ + 'cloudpickle' \ + 'conda-forge::blas=*=openblas' \ + 'cython' \ + 'dask' \ + 'dill' \ + 'h5py' \ + 'ipympl'\ + 'ipywidgets' \ + 'jupyterlab-git' \ + 'matplotlib-base' \ + 'numba' \ + 'numexpr' \ + 'openpyxl' \ + 'pandas' \ + 'patsy' \ + 'protobuf' \ + 'pytables' \ + 'scikit-image' \ + 'scikit-learn' \ + 'scipy' \ + 'seaborn' \ + 'sqlalchemy' \ + 'statsmodels' \ + 'sympy' \ + 'widgetsnbextension'\ + 'xlrd' && \ + mamba clean --all -f -y && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Install facets package which does not have a `pip` or `conda-forge` package at the moment +WORKDIR /tmp +RUN git clone https://github.com/PAIR-code/facets && \ + jupyter nbclassic-extension install facets/facets-dist/ --sys-prefix && \ + rm -rf /tmp/facets && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" + +# Import matplotlib the first time to build the font cache +RUN MPLBACKEND=Agg python -c "import matplotlib.pyplot" && \ + fix-permissions "/home/${NB_USER}" + +USER ${NB_UID} + +WORKDIR "${HOME}" diff --git a/docker-stacks/images/scipy-notebook/README.md b/docker-stacks/images/scipy-notebook/README.md new file mode 100644 index 0000000..68d56ec --- /dev/null +++ b/docker-stacks/images/scipy-notebook/README.md @@ -0,0 +1,14 @@ +# Jupyter Notebook Scientific Python Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/scipy-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/scipy-notebook.svg)](https://hub.docker.com/r/jupyter/scipy-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/scipy-notebook.svg)](https://hub.docker.com/r/jupyter/scipy-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/scipy-notebook/latest)](https://hub.docker.com/r/jupyter/scipy-notebook/ "jupyter/scipy-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/scipy-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-scipy-notebook) diff --git a/docker-stacks/images/tensorflow-notebook/.dockerignore b/docker-stacks/images/tensorflow-notebook/.dockerignore new file mode 100644 index 0000000..9dea340 --- /dev/null +++ b/docker-stacks/images/tensorflow-notebook/.dockerignore @@ -0,0 +1,2 @@ +# Documentation +README.md diff --git a/docker-stacks/images/tensorflow-notebook/Dockerfile b/docker-stacks/images/tensorflow-notebook/Dockerfile new file mode 100644 index 0000000..7038f08 --- /dev/null +++ b/docker-stacks/images/tensorflow-notebook/Dockerfile @@ -0,0 +1,17 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +ARG REGISTRY=quay.io +ARG OWNER=jupyter +ARG BASE_CONTAINER=$REGISTRY/$OWNER/scipy-notebook +FROM $BASE_CONTAINER + +LABEL maintainer="Jupyter Project " + +# Fix: https://github.com/hadolint/hadolint/wiki/DL4006 +# Fix: https://github.com/koalaman/shellcheck/wiki/SC3014 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install Tensorflow with pip +RUN pip install --no-cache-dir tensorflow && \ + fix-permissions "${CONDA_DIR}" && \ + fix-permissions "/home/${NB_USER}" diff --git a/docker-stacks/images/tensorflow-notebook/README.md b/docker-stacks/images/tensorflow-notebook/README.md new file mode 100644 index 0000000..41e7e2b --- /dev/null +++ b/docker-stacks/images/tensorflow-notebook/README.md @@ -0,0 +1,15 @@ +# Jupyter Notebook Deep Learning Stack + +> **Images hosted on Docker Hub are no longer updated. Please, use [quay.io image](https://quay.io/repository/jupyter/tensorflow-notebook)** + +[![docker pulls](https://img.shields.io/docker/pulls/jupyter/tensorflow-notebook.svg)](https://hub.docker.com/r/jupyter/tensorflow-notebook/) +[![docker stars](https://img.shields.io/docker/stars/jupyter/tensorflow-notebook.svg)](https://hub.docker.com/r/jupyter/tensorflow-notebook/) +[![image size](https://img.shields.io/docker/image-size/jupyter/tensorflow-notebook/latest)](https://hub.docker.com/r/jupyter/tensorflow-notebook/ "jupyter/tensorflow-notebook image size") + +GitHub Actions in the project builds and pushes this image to the Registry. + +Please visit the project documentation site for help to use and contribute to this image and others. + +- [Jupyter Docker Stacks on ReadTheDocs](https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html) +- [Selecting an Image :: Core Stacks :: jupyter/tensorflow-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-tensorflow-notebook) +- [Image Specifics :: Tensorflow](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/specifics.html#tensorflow) diff --git a/docker-stacks/mypy.ini b/docker-stacks/mypy.ini new file mode 100644 index 0000000..3e20bf5 --- /dev/null +++ b/docker-stacks/mypy.ini @@ -0,0 +1,46 @@ +# Mypy is an optional static type checker for Python that aims to combine +# the benefits of dynamic (or "duck") typing and static typing. +# +# Documentation: https://www.mypy-lang.org +# Project: https://github.com/python/mypy +# Config reference: https://mypy.readthedocs.io/en/stable/config_file.html +# +# We use mypy as part of pre-commit checks + +[mypy] +python_version = 3.9 +follow_imports = error +strict = True +no_incremental = True +# This allows us to use pytest decorators, which are not typed yet +disallow_untyped_decorators = False + +# These sections allow us to ignore mypy errors for packages +# which are not (hopefully yet) statically typed + +[mypy-Cython.*] +ignore_missing_imports = True + +[mypy-docker.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-plumbum.*] +ignore_missing_imports = True + +[mypy-pyspark.*] +ignore_missing_imports = True + +[mypy-setuptools.*] +ignore_missing_imports = True + +[mypy-tensorflow.*] +ignore_missing_imports = True + +[mypy-torch.*] +ignore_missing_imports = True diff --git a/docker-stacks/requirements-dev.txt b/docker-stacks/requirements-dev.txt new file mode 100644 index 0000000..3ab2be9 --- /dev/null +++ b/docker-stacks/requirements-dev.txt @@ -0,0 +1,10 @@ +docker +plumbum +pre-commit +pytest +pytest-retry +# `pytest-xdist` is a plugin that provides the `--numprocesses` flag, +# allowing us to run `pytest` tests in parallel +pytest-xdist +requests +tabulate diff --git a/docker-stacks/tagging/README.md b/docker-stacks/tagging/README.md new file mode 100644 index 0000000..5d4ff4a --- /dev/null +++ b/docker-stacks/tagging/README.md @@ -0,0 +1,126 @@ +# Docker stacks tagging and manifest creation + +The main purpose of the source code in this folder is to properly tag all the images and to update [build manifests](https://github.com/jupyter/docker-stacks/wiki). +These two processes are closely related, so the source code is widely reused. + +A basic example of a tag is a `Python` version tag. +For example, an image `jupyter/base-notebook` with `python 3.10.5` will have a full image name `quay.io/jupyter/base-notebook:python-3.10.5`. +This tag (and all the other tags) are pushed to Quay.io. + +Manifest is a description of some important part of the image in a `markdown`. +For example, we dump all the `conda` packages, including their versions. + +## Main principles + +- All the images are located in a hierarchical tree. + More info on [image relationships](../docs/using/selecting.md#image-relationships). +- We have `tagger` and `manifest` classes, which can be run inside docker containers to obtain tags and build manifest pieces. +- These classes are inherited from the parent image to all the child images. +- Because manifests and tags might change from parent to child, `taggers` and `manifests` are reevaluated on each image. + So, the values are not inherited. +- To tag an image and create a manifest, run `make hook/base-notebook` (or another image of your choice). + +## Source code description + +In this section, we will briefly describe the source code in this folder and give examples of how to use it. + +### DockerRunner + +`DockerRunner` is a helper class to easily run a docker container and execute commands inside this container: + +```python +from tagging.docker_runner import DockerRunner + +with DockerRunner("ubuntu:22.04") as container: + DockerRunner.run_simple_command(container, cmd="env", print_result=True) +``` + +### GitHelper + +`GitHelper` methods are run in the current `git` repo and give the information about the last commit hash and commit message: + +```python +from tagging.git_helper import GitHelper + +print("Git hash:", GitHelper.commit_hash()) +print("Git message:", GitHelper.commit_message()) +``` + +The prefix of commit hash (namely, 12 letters) is used as an image tag to make it easy to inherit from a fixed version of a docker image. + +### Tagger + +`Tagger` is a class that can be run inside a docker container to calculate some tag for an image. + +All the taggers are inherited from `TaggerInterface`: + +```python +class TaggerInterface: + """Common interface for all taggers""" + + @staticmethod + def tag_value(container) -> str: + raise NotImplementedError +``` + +So, the `tag_value(container)` method gets a docker container as an input and returns a tag. + +`SHATagger` example: + +```python +from tagging.git_helper import GitHelper +from tagging.taggers import TaggerInterface + + +class SHATagger(TaggerInterface): + @staticmethod + def tag_value(container): + return GitHelper.commit_hash_tag() +``` + +- `taggers.py` contains all the taggers. +- `tag_image.py` is a Python executable that is used to tag the image. + +### Manifest + +`ManifestHeader` is a build manifest header. +It contains the following sections: `Build timestamp`, `Docker image size`, and `Git commit` info. + +All the other manifest classes are inherited from `ManifestInterface`: + +```python +class ManifestInterface: + """Common interface for all manifests""" + + @staticmethod + def markdown_piece(container) -> str: + raise NotImplementedError +``` + +- The `markdown_piece(container)` method returns a piece of markdown file to be used as a part of the build manifest. + +`AptPackagesManifest` example: + +```python +from tagging.manifests import ManifestInterface, quoted_output + + +class AptPackagesManifest(ManifestInterface): + @staticmethod + def markdown_piece(container) -> str: + return f"""\ +## Apt Packages + +{quoted_output(container, "apt list --installed")}""" +``` + +- `quoted_output` simply runs the command inside a container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece. + It also adds the command which was run to the markdown piece. +- `manifests.py` contains all the manifests. +- `write_manifest.py` is a Python executable that is used to create the build manifest and history line for an image. + +### Images Hierarchy + +All images' dependencies on each other and what taggers and manifest they make use of are defined in `images_hierarchy.py`. + +`get_taggers_and_manifests.py` defines a helper function to get the taggers and manifests for a specific image. diff --git a/docker-stacks/tagging/__init__.py b/docker-stacks/tagging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docker-stacks/tagging/apply_tags.py b/docker-stacks/tagging/apply_tags.py new file mode 100755 index 0000000..1c33a8f --- /dev/null +++ b/docker-stacks/tagging/apply_tags.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import logging +from pathlib import Path + +import plumbum + +from tagging.get_platform import unify_aarch64 + +docker = plumbum.local["docker"] + +LOGGER = logging.getLogger(__name__) + + +def apply_tags( + short_image_name: str, + registry: str, + owner: str, + tags_dir: Path, + platform: str, +) -> None: + """ + Tags //:latest with the tags reported by all taggers for this image + """ + LOGGER.info(f"Tagging image: {short_image_name}") + + image = f"{registry}/{owner}/{short_image_name}:latest" + filename = f"{platform}-{short_image_name}.txt" + tags = (tags_dir / filename).read_text().splitlines() + + for tag in tags: + LOGGER.info(f"Applying tag: {tag}") + docker["tag", image, tag] & plumbum.FG + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--tags-dir", + required=True, + type=Path, + help="Directory with saved tags file", + ) + arg_parser.add_argument( + "--platform", + required=True, + type=str, + choices=["x86_64", "aarch64", "arm64"], + help="Image platform", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["docker.io", "quay.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + args = arg_parser.parse_args() + args.platform = unify_aarch64(args.platform) + + apply_tags( + args.short_image_name, args.registry, args.owner, args.tags_dir, args.platform + ) diff --git a/docker-stacks/tagging/docker_runner.py b/docker-stacks/tagging/docker_runner.py new file mode 100644 index 0000000..9d71f27 --- /dev/null +++ b/docker-stacks/tagging/docker_runner.py @@ -0,0 +1,58 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +from types import TracebackType +from typing import Optional + +import docker +from docker.models.containers import Container + +LOGGER = logging.getLogger(__name__) + + +class DockerRunner: + def __init__( + self, + image_name: str, + docker_client: docker.DockerClient = docker.from_env(), + command: str = "sleep infinity", + ): + self.container: Optional[Container] = None + self.image_name: str = image_name + self.command: str = command + self.docker_client: docker.DockerClient = docker_client + + def __enter__(self) -> Container: + LOGGER.info(f"Creating container for image {self.image_name} ...") + self.container = self.docker_client.containers.run( + image=self.image_name, + command=self.command, + detach=True, + ) + LOGGER.info(f"Container {self.container.name} created") + return self.container + + def __exit__( + self, + exc_type: Optional[type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + assert self.container is not None + LOGGER.info(f"Removing container {self.container.name} ...") + if self.container: + self.container.remove(force=True) + LOGGER.info(f"Container {self.container.name} removed") + + @staticmethod + def run_simple_command( + container: Container, cmd: str, print_result: bool = True + ) -> str: + LOGGER.info(f"Running cmd: '{cmd}' on container: {container}") + out = container.exec_run(cmd) + result = out.output.decode("utf-8").rstrip() + assert isinstance(result, str) + if print_result: + LOGGER.info(f"Command result: {result}") + assert out.exit_code == 0, f"Command: {cmd} failed" + return result diff --git a/docker-stacks/tagging/get_platform.py b/docker-stacks/tagging/get_platform.py new file mode 100644 index 0000000..cda791a --- /dev/null +++ b/docker-stacks/tagging/get_platform.py @@ -0,0 +1,21 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import platform + +ALL_PLATFORMS = {"x86_64", "aarch64"} + + +def unify_aarch64(platform: str) -> str: + """ + Renames arm64->aarch64 to support local builds on on aarch64 Macs + """ + return { + "aarch64": "aarch64", + "arm64": "aarch64", + "x86_64": "x86_64", + }[platform] + + +def get_platform() -> str: + machine = platform.machine() + return unify_aarch64(machine) diff --git a/docker-stacks/tagging/get_taggers_and_manifests.py b/docker-stacks/tagging/get_taggers_and_manifests.py new file mode 100644 index 0000000..d41d7a8 --- /dev/null +++ b/docker-stacks/tagging/get_taggers_and_manifests.py @@ -0,0 +1,23 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from typing import Optional + +from tagging.images_hierarchy import ALL_IMAGES +from tagging.manifests import ManifestInterface +from tagging.taggers import TaggerInterface + + +def get_taggers_and_manifests( + short_image_name: Optional[str], +) -> tuple[list[TaggerInterface], list[ManifestInterface]]: + if short_image_name is None: + return [[], []] # type: ignore + + image_description = ALL_IMAGES[short_image_name] + parent_taggers, parent_manifests = get_taggers_and_manifests( + image_description.parent_image + ) + return ( + parent_taggers + image_description.taggers, + parent_manifests + image_description.manifests, + ) diff --git a/docker-stacks/tagging/git_helper.py b/docker-stacks/tagging/git_helper.py new file mode 100755 index 0000000..d5c0c09 --- /dev/null +++ b/docker-stacks/tagging/git_helper.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import plumbum + +git = plumbum.local["git"] + + +class GitHelper: + @staticmethod + def commit_hash() -> str: + return git["rev-parse", "HEAD"]().strip() # type: ignore + + @staticmethod + def commit_hash_tag() -> str: + return GitHelper.commit_hash()[:12] + + @staticmethod + def commit_message() -> str: + return git["log", -1, "--pretty=%B"]().strip() # type: ignore + + +if __name__ == "__main__": + print("Git hash:", GitHelper.commit_hash()) + print("Git message:", GitHelper.commit_message()) diff --git a/docker-stacks/tagging/images_hierarchy.py b/docker-stacks/tagging/images_hierarchy.py new file mode 100644 index 0000000..8c3e3fd --- /dev/null +++ b/docker-stacks/tagging/images_hierarchy.py @@ -0,0 +1,93 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from dataclasses import dataclass, field +from typing import Optional + +from tagging.manifests import ( + AptPackagesManifest, + CondaEnvironmentManifest, + JuliaPackagesManifest, + ManifestInterface, + RPackagesManifest, + SparkInfoManifest, +) +from tagging.taggers import ( + DateTagger, + JavaVersionTagger, + JuliaVersionTagger, + JupyterHubVersionTagger, + JupyterLabVersionTagger, + JupyterNotebookVersionTagger, + PythonMajorMinorVersionTagger, + PythonVersionTagger, + PytorchVersionTagger, + RVersionTagger, + SHATagger, + SparkVersionTagger, + TaggerInterface, + TensorflowVersionTagger, + UbuntuVersionTagger, +) + + +@dataclass +class ImageDescription: + parent_image: Optional[str] + taggers: list[TaggerInterface] = field(default_factory=list) + manifests: list[ManifestInterface] = field(default_factory=list) + + +ALL_IMAGES = { + "docker-stacks-foundation": ImageDescription( + parent_image=None, + taggers=[ + SHATagger(), + DateTagger(), + UbuntuVersionTagger(), + PythonMajorMinorVersionTagger(), + PythonVersionTagger(), + ], + manifests=[CondaEnvironmentManifest(), AptPackagesManifest()], + ), + "base-notebook": ImageDescription( + parent_image="docker-stacks-foundation", + taggers=[ + JupyterNotebookVersionTagger(), + JupyterLabVersionTagger(), + JupyterHubVersionTagger(), + ], + ), + "minimal-notebook": ImageDescription(parent_image="base-notebook"), + "scipy-notebook": ImageDescription(parent_image="minimal-notebook"), + "r-notebook": ImageDescription( + parent_image="minimal-notebook", + taggers=[RVersionTagger()], + manifests=[RPackagesManifest()], + ), + "julia-notebook": ImageDescription( + parent_image="minimal-notebook", + taggers=[JuliaVersionTagger()], + manifests=[JuliaPackagesManifest()], + ), + "tensorflow-notebook": ImageDescription( + parent_image="scipy-notebook", taggers=[TensorflowVersionTagger()] + ), + "pytorch-notebook": ImageDescription( + parent_image="scipy-notebook", taggers=[PytorchVersionTagger()] + ), + "datascience-notebook": ImageDescription( + parent_image="scipy-notebook", + taggers=[RVersionTagger(), JuliaVersionTagger()], + manifests=[RPackagesManifest(), JuliaPackagesManifest()], + ), + "pyspark-notebook": ImageDescription( + parent_image="scipy-notebook", + taggers=[SparkVersionTagger(), JavaVersionTagger()], + manifests=[SparkInfoManifest()], + ), + "all-spark-notebook": ImageDescription( + parent_image="pyspark-notebook", + taggers=[RVersionTagger()], + manifests=[RPackagesManifest()], + ), +} diff --git a/docker-stacks/tagging/manifests.py b/docker-stacks/tagging/manifests.py new file mode 100644 index 0000000..f043de0 --- /dev/null +++ b/docker-stacks/tagging/manifests.py @@ -0,0 +1,121 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import plumbum +from docker.models.containers import Container + +from tagging.docker_runner import DockerRunner +from tagging.git_helper import GitHelper + +docker = plumbum.local["docker"] + + +def quoted_output(container: Container, cmd: str) -> str: + cmd_output = DockerRunner.run_simple_command(container, cmd, print_result=False) + # For example, `mamba info --quiet` adds redundant empty lines + cmd_output = cmd_output.strip("\n") + # For example, R packages list contains trailing backspaces + cmd_output = "\n".join(line.rstrip() for line in cmd_output.split("\n")) + return f"""\ +`{cmd}`: + +```text +{cmd_output} +```""" + + +class ManifestHeader: + """ManifestHeader doesn't fall under common interface, and we run it separately""" + + @staticmethod + def create_header( + short_image_name: str, registry: str, owner: str, build_timestamp: str + ) -> str: + commit_hash = GitHelper.commit_hash() + commit_hash_tag = GitHelper.commit_hash_tag() + commit_message = GitHelper.commit_message() + + # Unfortunately, `docker images` doesn't work when specifying `docker.io` as registry + fixed_registry = registry + "/" if registry != "docker.io" else "" + + image_size = docker[ + "images", + f"{fixed_registry}{owner}/{short_image_name}:latest", + "--format", + "{{.Size}}", + ]().rstrip() + + return f"""\ +# Build manifest for image: {short_image_name}:{commit_hash_tag} + +## Build Info + +- Build timestamp: {build_timestamp} +- Docker image: `{registry}/{owner}/{short_image_name}:{commit_hash_tag}` +- Docker image size: {image_size} +- Git commit SHA: [{commit_hash}](https://github.com/jupyter/docker-stacks/commit/{commit_hash}) +- Git commit message: + +```text +{commit_message} +```""" + + +class ManifestInterface: + """Common interface for all manifests""" + + @staticmethod + def markdown_piece(container: Container) -> str: + raise NotImplementedError + + +class CondaEnvironmentManifest(ManifestInterface): + @staticmethod + def markdown_piece(container: Container) -> str: + return f"""\ +## Python Packages + +{DockerRunner.run_simple_command(container, "python --version")} + +{quoted_output(container, "mamba info --quiet")} + +{quoted_output(container, "mamba list")}""" + + +class AptPackagesManifest(ManifestInterface): + @staticmethod + def markdown_piece(container: Container) -> str: + return f"""\ +## Apt Packages + +{quoted_output(container, "apt list --installed")}""" + + +class RPackagesManifest(ManifestInterface): + @staticmethod + def markdown_piece(container: Container) -> str: + return f"""\ +## R Packages + +{quoted_output(container, "R --version")} + +{quoted_output(container, "R --silent -e 'installed.packages(.Library)[, c(1,3)]'")}""" + + +class JuliaPackagesManifest(ManifestInterface): + @staticmethod + def markdown_piece(container: Container) -> str: + return f"""\ +## Julia Packages + +{quoted_output(container, "julia -E 'using InteractiveUtils; versioninfo()'")} + +{quoted_output(container, "julia -E 'import Pkg; Pkg.status()'")}""" + + +class SparkInfoManifest(ManifestInterface): + @staticmethod + def markdown_piece(container: Container) -> str: + return f"""\ +## Apache Spark + +{quoted_output(container, "/usr/local/spark/bin/spark-submit --version")}""" diff --git a/docker-stacks/tagging/merge_tags.py b/docker-stacks/tagging/merge_tags.py new file mode 100755 index 0000000..885a482 --- /dev/null +++ b/docker-stacks/tagging/merge_tags.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import logging +from pathlib import Path + +import plumbum + +from tagging.get_platform import ALL_PLATFORMS + +docker = plumbum.local["docker"] + +LOGGER = logging.getLogger(__name__) + + +def merge_tags( + short_image_name: str, + tags_dir: Path, +) -> None: + """ + Merge tags for x86_64 and aarch64 images when possible. + """ + LOGGER.info(f"Merging tags for image: {short_image_name}") + + all_tags: set[str] = set() + + for platform in ALL_PLATFORMS: + filename = f"{platform}-{short_image_name}.txt" + tags = (tags_dir / filename).read_text().splitlines() + all_tags.update(tag.replace(platform + "-", "") for tag in tags) + + LOGGER.info(f"Got tags: {all_tags}") + + for tag in all_tags: + LOGGER.info(f"Trying to merge tag: {tag}") + existing_images = [] + for platform in ALL_PLATFORMS: + image_with_platform = tag.replace(":", f":{platform}-") + LOGGER.info(f"Trying to pull: {image_with_platform}") + try: + docker["pull", image_with_platform] & plumbum.FG + existing_images.append(image_with_platform) + LOGGER.info("Pull success") + except plumbum.ProcessExecutionError: + LOGGER.info( + "Pull failed, image with this tag and platform doesn't exist" + ) + + LOGGER.info(f"Found images: {existing_images}") + docker["manifest", "create", tag][existing_images] & plumbum.FG + docker["manifest", "push", tag] & plumbum.FG + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--tags-dir", + required=True, + type=Path, + help="Directory with saved tags file", + ) + args = arg_parser.parse_args() + + merge_tags(args.short_image_name, args.tags_dir) diff --git a/docker-stacks/tagging/taggers.py b/docker-stacks/tagging/taggers.py new file mode 100644 index 0000000..1aa6705 --- /dev/null +++ b/docker-stacks/tagging/taggers.py @@ -0,0 +1,130 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from datetime import datetime + +from docker.models.containers import Container + +from tagging.docker_runner import DockerRunner +from tagging.git_helper import GitHelper + + +def _get_program_version(container: Container, program: str) -> str: + return DockerRunner.run_simple_command(container, cmd=f"{program} --version") + + +def _get_pip_package_version(container: Container, package: str) -> str: + PIP_VERSION_PREFIX = "Version: " + + package_info = DockerRunner.run_simple_command( + container, + cmd=f"pip show {package}", + print_result=False, + ) + version_line = package_info.split("\n")[1] + assert version_line.startswith(PIP_VERSION_PREFIX) + return version_line[len(PIP_VERSION_PREFIX) :] + + +class TaggerInterface: + """Common interface for all taggers""" + + @staticmethod + def tag_value(container: Container) -> str: + raise NotImplementedError + + +class SHATagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return GitHelper.commit_hash_tag() + + +class DateTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return datetime.utcnow().strftime("%Y-%m-%d") + + +class UbuntuVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + os_release = DockerRunner.run_simple_command( + container, + "cat /etc/os-release", + ).split("\n") + for line in os_release: + if line.startswith("VERSION_ID"): + return "ubuntu-" + line.split("=")[1].strip('"') + raise RuntimeError(f"did not find ubuntu version in: {os_release}") + + +class PythonVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "python-" + _get_program_version(container, "python").split()[1] + + +class PythonMajorMinorVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + full_version = PythonVersionTagger.tag_value(container) + return full_version[: full_version.rfind(".")] + + +class JupyterNotebookVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "notebook-" + _get_program_version(container, "jupyter-notebook") + + +class JupyterLabVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "lab-" + _get_program_version(container, "jupyter-lab") + + +class JupyterHubVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "hub-" + _get_program_version(container, "jupyterhub") + + +class RVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "r-" + _get_program_version(container, "R").split()[2] + + +class TensorflowVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "tensorflow-" + _get_pip_package_version(container, "tensorflow") + + +class PytorchVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "pytorch-" + _get_pip_package_version(container, "torch").split("+")[0] + + +class JuliaVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "julia-" + _get_program_version(container, "julia").split()[2] + + +class SparkVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version" + + spark_version = _get_program_version(container, "spark-submit") + version_line = spark_version.split("\n")[4] + assert version_line.startswith(SPARK_VERSION_LINE_PREFIX) + return "spark-" + version_line.split(" ")[-1] + + +class JavaVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "java-" + _get_program_version(container, "java").split()[1] diff --git a/docker-stacks/tagging/update_wiki.py b/docker-stacks/tagging/update_wiki.py new file mode 100755 index 0000000..292ee2d --- /dev/null +++ b/docker-stacks/tagging/update_wiki.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import logging +import shutil +from pathlib import Path + +LOGGER = logging.getLogger(__name__) + + +def update_home_wiki_page(wiki_dir: Path, month: str) -> None: + TABLE_BEGINNING = """\ +| Month | +| ---------------------- | +""" + wiki_home_file = wiki_dir / "Home.md" + wiki_home_content = wiki_home_file.read_text() + month_line = f"| [`{month}`](./{month}) |\n" + if month_line not in wiki_home_content: + assert TABLE_BEGINNING in wiki_home_content + wiki_home_content = wiki_home_content.replace( + TABLE_BEGINNING, TABLE_BEGINNING + month_line + ) + wiki_home_file.write_text(wiki_home_content) + LOGGER.info(f"Updated wiki home page with month: {month}") + + +def update_monthly_wiki_page( + wiki_dir: Path, month: str, build_history_line: str +) -> None: + MONTHLY_PAGE_HEADER = f"""\ +# Images built during {month} + +| Date | Image | Links | +| - | - | - | +""" + monthly_page = wiki_dir / "monthly-files" / (month + ".md") + if not monthly_page.exists(): + monthly_page.write_text(MONTHLY_PAGE_HEADER) + LOGGER.info(f"Created monthly page: {monthly_page.relative_to(wiki_dir)}") + + monthly_page_content = monthly_page.read_text() + assert MONTHLY_PAGE_HEADER in monthly_page_content + monthly_page_content = monthly_page_content.replace( + MONTHLY_PAGE_HEADER, MONTHLY_PAGE_HEADER + build_history_line + "\n" + ) + monthly_page.write_text(monthly_page_content) + LOGGER.info(f"Updated monthly page: {monthly_page.relative_to(wiki_dir)}") + + +def get_manifest_timestamp(manifest_file: Path) -> str: + file_content = manifest_file.read_text() + TIMESTAMP_PREFIX = "Build timestamp: " + TIMESTAMP_LENGTH = 20 + timestamp = file_content[ + file_content.find(TIMESTAMP_PREFIX) + len(TIMESTAMP_PREFIX) : + ][:TIMESTAMP_LENGTH] + # Should be good enough till year 2100 + assert timestamp.startswith("20"), timestamp + assert timestamp.endswith("Z"), timestamp + return timestamp + + +def get_manifest_month(manifest_file: Path) -> str: + return get_manifest_timestamp(manifest_file)[:7] + + +def remove_old_manifests(wiki_dir: Path) -> None: + MAX_NUMBER_OF_MANIFESTS = 4500 + + manifest_files: list[tuple[str, Path]] = [] + for file in (wiki_dir / "manifests").rglob("*.md"): + manifest_files.append((get_manifest_timestamp(file), file)) + + manifest_files.sort(reverse=True) + for _, file in manifest_files[MAX_NUMBER_OF_MANIFESTS:]: + file.unlink() + LOGGER.info(f"Removed manifest: {file.relative_to(wiki_dir)}") + + +def update_wiki(wiki_dir: Path, hist_lines_dir: Path, manifests_dir: Path) -> None: + LOGGER.info("Updating wiki") + + manifest_files = list(manifests_dir.rglob("*.md")) + assert manifest_files, "expected to have some manifest files" + for manifest_file in manifest_files: + month = get_manifest_month(manifest_file) + copy_to = wiki_dir / "manifests" / month / manifest_file.name + copy_to.parent.mkdir(exist_ok=True) + shutil.copy(manifest_file, copy_to) + LOGGER.info(f"Added manifest file: {copy_to.relative_to(wiki_dir)}") + + build_history_line_files = sorted(hist_lines_dir.rglob("*.txt")) + assert build_history_line_files, "expected to have some build history line files" + for build_history_line_file in build_history_line_files: + build_history_line = build_history_line_file.read_text() + assert build_history_line.startswith("| `") + month = build_history_line[3:10] + update_home_wiki_page(wiki_dir, month) + update_monthly_wiki_page(wiki_dir, month, build_history_line) + + remove_old_manifests(wiki_dir) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--wiki-dir", + required=True, + type=Path, + help="Directory of the wiki repo", + ) + arg_parser.add_argument( + "--hist-lines-dir", + required=True, + type=Path, + help="Directory with history lines", + ) + arg_parser.add_argument( + "--manifests-dir", + required=True, + type=Path, + help="Directory with manifest files", + ) + args = arg_parser.parse_args() + + update_wiki(args.wiki_dir, args.hist_lines_dir, args.manifests_dir) diff --git a/docker-stacks/tagging/write_manifest.py b/docker-stacks/tagging/write_manifest.py new file mode 100755 index 0000000..c4605fd --- /dev/null +++ b/docker-stacks/tagging/write_manifest.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import datetime +import logging +from pathlib import Path + +from docker.models.containers import Container + +from tagging.docker_runner import DockerRunner +from tagging.get_platform import get_platform +from tagging.get_taggers_and_manifests import get_taggers_and_manifests +from tagging.git_helper import GitHelper +from tagging.manifests import ManifestHeader, ManifestInterface + +LOGGER = logging.getLogger(__name__) + +# We use a manifest creation timestamp, which happens right after a build +BUILD_TIMESTAMP = datetime.datetime.utcnow().isoformat()[:-7] + "Z" +MARKDOWN_LINE_BREAK = "
" + + +def write_build_history_line( + short_image_name: str, + registry: str, + owner: str, + hist_lines_dir: Path, + filename: str, + all_tags: list[str], +) -> None: + LOGGER.info("Appending build history line") + + date_column = f"`{BUILD_TIMESTAMP}`" + image_column = MARKDOWN_LINE_BREAK.join( + f"`{registry}/{owner}/{short_image_name}:{tag_value}`" for tag_value in all_tags + ) + commit_hash = GitHelper.commit_hash() + links_column = MARKDOWN_LINE_BREAK.join( + [ + f"[Git diff](https://github.com/jupyter/docker-stacks/commit/{commit_hash})", + f"[Dockerfile](https://github.com/jupyter/docker-stacks/blob/{commit_hash}/images/{short_image_name}/Dockerfile)", + f"[Build manifest](./{filename})", + ] + ) + build_history_line = f"| {date_column} | {image_column} | {links_column} |" + hist_lines_dir.mkdir(parents=True, exist_ok=True) + (hist_lines_dir / f"{filename}.txt").write_text(build_history_line) + + +def write_manifest_file( + short_image_name: str, + registry: str, + owner: str, + manifests_dir: Path, + filename: str, + manifests: list[ManifestInterface], + container: Container, +) -> None: + manifest_names = [manifest.__class__.__name__ for manifest in manifests] + LOGGER.info(f"Using manifests: {manifest_names}") + + markdown_pieces = [ + ManifestHeader.create_header(short_image_name, registry, owner, BUILD_TIMESTAMP) + ] + [manifest.markdown_piece(container) for manifest in manifests] + markdown_content = "\n\n".join(markdown_pieces) + "\n" + + manifests_dir.mkdir(parents=True, exist_ok=True) + (manifests_dir / f"{filename}.md").write_text(markdown_content) + + +def write_manifest( + short_image_name: str, + registry: str, + owner: str, + hist_lines_dir: Path, + manifests_dir: Path, +) -> None: + LOGGER.info(f"Creating manifests for image: {short_image_name}") + taggers, manifests = get_taggers_and_manifests(short_image_name) + + image = f"{registry}/{owner}/{short_image_name}:latest" + + file_prefix = get_platform() + commit_hash_tag = GitHelper.commit_hash_tag() + filename = f"{file_prefix}-{short_image_name}-{commit_hash_tag}" + + with DockerRunner(image) as container: + tags_prefix = get_platform() + all_tags = [ + tags_prefix + "-" + tagger.tag_value(container) for tagger in taggers + ] + write_build_history_line( + short_image_name, registry, owner, hist_lines_dir, filename, all_tags + ) + write_manifest_file( + short_image_name, + registry, + owner, + manifests_dir, + filename, + manifests, + container, + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--hist-lines-dir", + required=True, + type=Path, + help="Directory to save history line", + ) + arg_parser.add_argument( + "--manifests-dir", + required=True, + type=Path, + help="Directory to save manifest file", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["docker.io", "quay.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + args = arg_parser.parse_args() + + LOGGER.info(f"Current build timestamp: {BUILD_TIMESTAMP}") + + write_manifest( + args.short_image_name, + args.registry, + args.owner, + args.hist_lines_dir, + args.manifests_dir, + ) diff --git a/docker-stacks/tagging/write_tags_file.py b/docker-stacks/tagging/write_tags_file.py new file mode 100755 index 0000000..880fec4 --- /dev/null +++ b/docker-stacks/tagging/write_tags_file.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import logging +from pathlib import Path + +from tagging.docker_runner import DockerRunner +from tagging.get_platform import get_platform +from tagging.get_taggers_and_manifests import get_taggers_and_manifests + +LOGGER = logging.getLogger(__name__) + + +def write_tags_file( + short_image_name: str, + registry: str, + owner: str, + tags_dir: Path, +) -> None: + """ + Writes tags file for the image //:latest + """ + LOGGER.info(f"Tagging image: {short_image_name}") + taggers, _ = get_taggers_and_manifests(short_image_name) + + image = f"{registry}/{owner}/{short_image_name}:latest" + tags_prefix = get_platform() + filename = f"{tags_prefix}-{short_image_name}.txt" + + tags = [f"{registry}/{owner}/{short_image_name}:{tags_prefix}-latest"] + with DockerRunner(image) as container: + for tagger in taggers: + tagger_name = tagger.__class__.__name__ + tag_value = tagger.tag_value(container) + LOGGER.info( + f"Calculated tag, tagger_name: {tagger_name} tag_value: {tag_value}" + ) + tags.append( + f"{registry}/{owner}/{short_image_name}:{tags_prefix}-{tag_value}" + ) + tags_dir.mkdir(parents=True, exist_ok=True) + (tags_dir / filename).write_text("\n".join(tags)) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--tags-dir", + required=True, + type=Path, + help="Directory to save tags file", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["docker.io", "quay.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + args = arg_parser.parse_args() + + write_tags_file(args.short_image_name, args.registry, args.owner, args.tags_dir) diff --git a/docker-stacks/tests/README.md b/docker-stacks/tests/README.md new file mode 100644 index 0000000..b477e2d --- /dev/null +++ b/docker-stacks/tests/README.md @@ -0,0 +1,3 @@ +# Docker stacks testing + +Please, refer to the [testing section of documentation](https://jupyter-docker-stacks.readthedocs.io/en/latest/contributing/tests.html) to see how the tests are run. diff --git a/docker-stacks/tests/R_mimetype_check.py b/docker-stacks/tests/R_mimetype_check.py new file mode 100644 index 0000000..ba90fa5 --- /dev/null +++ b/docker-stacks/tests/R_mimetype_check.py @@ -0,0 +1,24 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +def check_r_mimetypes(container: TrackedContainer) -> None: + """Check if Rscript command can be executed""" + LOGGER.info("Test that R command can be executed ...") + R_MIMETYPES_CHECK_CMD = 'if (length(getOption("jupyter.plot_mimetypes")) != 5) {stop("missing jupyter.plot_mimetypes")}' + command = ["Rscript", "-e", R_MIMETYPES_CHECK_CMD] + logs = container.run_and_wait( + timeout=10, + tty=True, + command=command, + ) + LOGGER.debug(f"{logs=}") + # If there is any output after this it means there was an error + assert logs.splitlines()[-1] == "Executing the command: " + " ".join( + command + ), f"Command {R_MIMETYPES_CHECK_CMD=} failed" diff --git a/docker-stacks/tests/__init__.py b/docker-stacks/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docker-stacks/tests/all-spark-notebook/data/issue_1168.ipynb b/docker-stacks/tests/all-spark-notebook/data/issue_1168.ipynb new file mode 100644 index 0000000..4d34371 --- /dev/null +++ b/docker-stacks/tests/all-spark-notebook/data/issue_1168.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a test for the issue [#1168](https://github.com/jupyter/docker-stacks/issues/1168)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "\n", + "# Spark session & context\n", + "spark = SparkSession.builder.master(\"local\").getOrCreate()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n", + "\n", + "\n", + "def filter_func(iterator):\n", + " for pdf in iterator:\n", + " yield pdf[pdf.id == 1]\n", + "\n", + "\n", + "df.mapInPandas(filter_func, df.schema).show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/tests/all-spark-notebook/data/local_pyspark.ipynb b/docker-stacks/tests/all-spark-notebook/data/local_pyspark.ipynb new file mode 100644 index 0000000..fb379ae --- /dev/null +++ b/docker-stacks/tests/all-spark-notebook/data/local_pyspark.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "\n", + "# Spark session & context\n", + "spark = SparkSession.builder.master(\"local\").getOrCreate()\n", + "sc = spark.sparkContext\n", + "\n", + "# Sum of the first 100 whole numbers\n", + "rdd = sc.parallelize(range(100 + 1))\n", + "rdd.sum()\n", + "# 5050" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/tests/all-spark-notebook/data/local_sparkR.ipynb b/docker-stacks/tests/all-spark-notebook/data/local_sparkR.ipynb new file mode 100644 index 0000000..aaa11ba --- /dev/null +++ b/docker-stacks/tests/all-spark-notebook/data/local_sparkR.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "library(SparkR)\n", + "\n", + "# Spark session & context\n", + "sc <- sparkR.session(\"local\")\n", + "\n", + "# Sum of the first 100 whole numbers\n", + "sdf <- createDataFrame(list(1:100))\n", + "dapplyCollect(sdf,\n", + " function(x) \n", + " { x <- sum(x)}\n", + " )\n", + "# 5050" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/tests/all-spark-notebook/data/local_sparklyr.ipynb b/docker-stacks/tests/all-spark-notebook/data/local_sparklyr.ipynb new file mode 100644 index 0000000..be6651b --- /dev/null +++ b/docker-stacks/tests/all-spark-notebook/data/local_sparklyr.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "library(sparklyr)\n", + "\n", + "# get the default config\n", + "conf <- spark_config()\n", + "# Set the catalog implementation in-memory\n", + "conf$spark.sql.catalogImplementation <- \"in-memory\"\n", + "\n", + "# Spark session & context\n", + "sc <- spark_connect(master = \"local\", config = conf)\n", + "\n", + "# Sum of the first 100 whole numbers\n", + "sdf_len(sc, 100, repartition = 1) %>% \n", + " spark_apply(function(e) sum(e))\n", + "# 5050" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/tests/all-spark-notebook/test_spark_notebooks.py b/docker-stacks/tests/all-spark-notebook/test_spark_notebooks.py new file mode 100644 index 0000000..7e54e5b --- /dev/null +++ b/docker-stacks/tests/all-spark-notebook/test_spark_notebooks.py @@ -0,0 +1,40 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +from pathlib import Path + +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +THIS_DIR = Path(__file__).parent.resolve() + + +@pytest.mark.flaky(retries=3, delay=1) +@pytest.mark.parametrize( + "test_file", + ["issue_1168", "local_pyspark", "local_sparklyr", "local_sparkR"], +) +def test_nbconvert(container: TrackedContainer, test_file: str) -> None: + """Check if Spark notebooks can be executed""" + host_data_dir = THIS_DIR / "data" + cont_data_dir = "/home/jovyan/data" + output_dir = "/tmp" + conversion_timeout_ms = 5000 + LOGGER.info(f"Test that {test_file} notebook can be executed ...") + command = ( + "jupyter nbconvert --to markdown " + + f"--ExecutePreprocessor.timeout={conversion_timeout_ms} " + + f"--output-dir {output_dir} " + + f"--execute {cont_data_dir}/{test_file}.ipynb" + ) + logs = container.run_and_wait( + timeout=60, + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + command=["bash", "-c", command], + ) + + expected_file = f"{output_dir}/{test_file}.md" + assert expected_file in logs, f"Expected file {expected_file} not generated" diff --git a/docker-stacks/tests/base-notebook/test_container_options.py b/docker-stacks/tests/base-notebook/test_container_options.py new file mode 100644 index 0000000..b330c1e --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_container_options.py @@ -0,0 +1,115 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import time + +import pytest # type: ignore +import requests + +from tests.conftest import TrackedContainer, find_free_port + +LOGGER = logging.getLogger(__name__) + + +def test_cli_args(container: TrackedContainer, http_client: requests.Session) -> None: + """Image should respect command line args (e.g., disabling token security)""" + host_port = find_free_port() + running_container = container.run_detached( + command=["start-notebook.py", "--IdentityProvider.token=''"], + ports={"8888/tcp": host_port}, + ) + resp = http_client.get(f"http://localhost:{host_port}") + resp.raise_for_status() + logs = running_container.logs().decode("utf-8") + LOGGER.debug(logs) + assert "ERROR" not in logs + warnings = TrackedContainer.get_warnings(logs) + assert not warnings + assert "login_submit" not in resp.text + + +def test_nb_user_change(container: TrackedContainer) -> None: + """Container should change the username (`NB_USER`) of the default user.""" + nb_user = "nayvoj" + running_container = container.run_detached( + tty=True, + user="root", + environment=[f"NB_USER={nb_user}", "CHOWN_HOME=yes"], + command=["bash", "-c", "sleep infinity"], + ) + + # Give the chown time to complete. + # Use sleep, not wait, because the container sleeps forever. + time.sleep(1) + LOGGER.info( + f"Checking if a home folder of {nb_user} contains the hidden '.jupyter' folder with appropriate permissions ..." + ) + command = f'stat -c "%F %U %G" /home/{nb_user}/.jupyter' + expected_output = f"directory {nb_user} users" + cmd = running_container.exec_run(command, workdir=f"/home/{nb_user}") + output = cmd.output.decode("utf-8").strip("\n") + assert ( + output == expected_output + ), f"Hidden folder .jupyter was not copied properly to {nb_user} home folder. stat: {output}, expected {expected_output}" + + +@pytest.mark.filterwarnings("ignore:Unverified HTTPS request") +def test_unsigned_ssl( + container: TrackedContainer, http_client: requests.Session +) -> None: + """Container should generate a self-signed SSL certificate + and Jupyter Server should use it to enable HTTPS. + """ + host_port = find_free_port() + running_container = container.run_detached( + environment=["GEN_CERT=yes"], + ports={"8888/tcp": host_port}, + ) + # NOTE: The requests.Session backing the http_client fixture + # does not retry properly while the server is booting up. + # An SSL handshake error seems to abort the retry logic. + # Forcing a long sleep for the moment until I have time to dig more. + time.sleep(1) + resp = http_client.get(f"https://localhost:{host_port}", verify=False) + resp.raise_for_status() + assert "login_submit" in resp.text + logs = running_container.logs().decode("utf-8") + assert "ERROR" not in logs + warnings = TrackedContainer.get_warnings(logs) + assert not warnings + + +@pytest.mark.parametrize( + "env", + [ + {}, + {"JUPYTER_PORT": 1234, "DOCKER_STACKS_JUPYTER_CMD": "lab"}, + {"JUPYTER_PORT": 2345, "DOCKER_STACKS_JUPYTER_CMD": "notebook"}, + {"JUPYTER_PORT": 3456, "DOCKER_STACKS_JUPYTER_CMD": "server"}, + {"JUPYTER_PORT": 4567, "DOCKER_STACKS_JUPYTER_CMD": "nbclassic"}, + {"JUPYTER_PORT": 5678, "RESTARTABLE": "yes"}, + {"JUPYTER_PORT": 6789}, + {"JUPYTER_PORT": 7890, "DOCKER_STACKS_JUPYTER_CMD": "notebook"}, + ], +) +def test_custom_internal_port( + container: TrackedContainer, + http_client: requests.Session, + env: dict[str, str], +) -> None: + """Container should be accessible from the host + when using custom internal port""" + host_port = find_free_port() + internal_port = env.get("JUPYTER_PORT", 8888) + running_container = container.run_detached( + command=["start-notebook.py", "--IdentityProvider.token=''"], + environment=env, + ports={internal_port: host_port}, + ) + resp = http_client.get(f"http://localhost:{host_port}") + resp.raise_for_status() + logs = running_container.logs().decode("utf-8") + LOGGER.debug(logs) + assert "ERROR" not in logs + warnings = TrackedContainer.get_warnings(logs) + assert not warnings diff --git a/docker-stacks/tests/base-notebook/test_healthcheck.py b/docker-stacks/tests/base-notebook/test_healthcheck.py new file mode 100644 index 0000000..d5874c9 --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_healthcheck.py @@ -0,0 +1,159 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import time +from typing import Optional + +import pytest # type: ignore + +from tests.conftest import TrackedContainer, get_health + +LOGGER = logging.getLogger(__name__) + + +@pytest.mark.parametrize( + "env,cmd,user", + [ + (None, None, None), + (["DOCKER_STACKS_JUPYTER_CMD=lab"], None, None), + (["DOCKER_STACKS_JUPYTER_CMD=notebook"], None, None), + (["DOCKER_STACKS_JUPYTER_CMD=server"], None, None), + (["DOCKER_STACKS_JUPYTER_CMD=nbclassic"], None, None), + (["RESTARTABLE=yes"], None, None), + (["JUPYTER_PORT=8171"], None, None), + (["JUPYTER_PORT=8117", "DOCKER_STACKS_JUPYTER_CMD=notebook"], None, None), + (None, ["start-notebook.sh"], None), + (None, ["start-notebook.py", "--ServerApp.base_url=/test"], None), + (None, ["start-notebook.py", "--ServerApp.base_url=/test/"], None), + (["GEN_CERT=1"], ["start-notebook.py", "--ServerApp.base_url=/test"], None), + ( + ["GEN_CERT=1", "JUPYTER_PORT=7891"], + ["start-notebook.py", "--ServerApp.base_url=/test"], + None, + ), + (["NB_USER=testuser", "CHOWN_HOME=1"], None, "root"), + ( + ["NB_USER=testuser", "CHOWN_HOME=1"], + ["start-notebook.py", "--ServerApp.base_url=/test"], + "root", + ), + ( + ["NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_PORT=8123"], + ["start-notebook.py", "--ServerApp.base_url=/test"], + "root", + ), + (["JUPYTER_RUNTIME_DIR=/tmp/jupyter-runtime"], ["start-notebook.sh"], None), + ( + [ + "NB_USER=testuser", + "CHOWN_HOME=1", + "JUPYTER_RUNTIME_DIR=/tmp/jupyter-runtime", + ], + ["start-notebook.sh"], + "root", + ), + ], +) +def test_healthy( + container: TrackedContainer, + env: Optional[list[str]], + cmd: Optional[list[str]], + user: Optional[str], +) -> None: + running_container = container.run_detached( + tty=True, + environment=env, + command=cmd, + user=user, + ) + + # giving some time to let the server start + finish_time = time.time() + 10 + sleep_time = 0.1 + while time.time() < finish_time: + time.sleep(sleep_time) + if get_health(running_container) == "healthy": + return + + assert get_health(running_container) == "healthy" + + +@pytest.mark.parametrize( + "env,cmd,user", + [ + ( + ["HTTPS_PROXY=host.docker.internal", "HTTP_PROXY=host.docker.internal"], + None, + None, + ), + ( + [ + "NB_USER=testuser", + "CHOWN_HOME=1", + "JUPYTER_PORT=8123", + "HTTPS_PROXY=host.docker.internal", + "HTTP_PROXY=host.docker.internal", + ], + ["start-notebook.py", "--ServerApp.base_url=/test"], + "root", + ), + ], +) +def test_healthy_with_proxy( + container: TrackedContainer, + env: Optional[list[str]], + cmd: Optional[list[str]], + user: Optional[str], +) -> None: + running_container = container.run_detached( + tty=True, + environment=env, + command=cmd, + user=user, + ) + + # giving some time to let the server start + finish_time = time.time() + 10 + sleep_time = 0.1 + while time.time() < finish_time: + time.sleep(sleep_time) + if get_health(running_container) == "healthy": + return + + assert get_health(running_container) == "healthy" + + +@pytest.mark.parametrize( + "env,cmd", + [ + (["NB_USER=testuser", "CHOWN_HOME=1"], None), + ( + ["NB_USER=testuser", "CHOWN_HOME=1"], + ["start-notebook.py", "--ServerApp.base_url=/test"], + ), + ( + ["NB_USER=testuser", "CHOWN_HOME=1", "JUPYTER_PORT=8123"], + ["start-notebook.py", "--ServerApp.base_url=/test"], + ), + ], +) +def test_not_healthy( + container: TrackedContainer, + env: Optional[list[str]], + cmd: Optional[list[str]], +) -> None: + running_container = container.run_detached( + tty=True, + environment=env, + command=cmd, + ) + + # giving some time to let the server start + finish_time = time.time() + 5 + sleep_time = 0.1 + while time.time() < finish_time: + time.sleep(sleep_time) + if get_health(running_container) == "healthy": + raise RuntimeError("Container should not be healthy for this testcase") + + assert get_health(running_container) != "healthy" diff --git a/docker-stacks/tests/base-notebook/test_notebook.py b/docker-stacks/tests/base-notebook/test_notebook.py new file mode 100644 index 0000000..3985990 --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_notebook.py @@ -0,0 +1,16 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import requests + +from tests.conftest import TrackedContainer, find_free_port + + +def test_secured_server( + container: TrackedContainer, http_client: requests.Session +) -> None: + """Jupyter Server should eventually request user login.""" + host_port = find_free_port() + container.run_detached(ports={"8888/tcp": host_port}) + resp = http_client.get(f"http://localhost:{host_port}") + resp.raise_for_status() + assert "login_submit" in resp.text, "User login not requested" diff --git a/docker-stacks/tests/base-notebook/test_npm_package_manager.py b/docker-stacks/tests/base-notebook/test_npm_package_manager.py new file mode 100644 index 0000000..14f5095 --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_npm_package_manager.py @@ -0,0 +1,9 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from tests.conftest import TrackedContainer +from tests.run_command import run_command + + +def test_npm_package_manager(container: TrackedContainer) -> None: + """Test that npm is installed and runs.""" + run_command(container, "npm --version") diff --git a/docker-stacks/tests/base-notebook/test_pandoc.py b/docker-stacks/tests/base-notebook/test_pandoc.py new file mode 100644 index 0000000..3c828a3 --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_pandoc.py @@ -0,0 +1,17 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +def test_pandoc(container: TrackedContainer) -> None: + """Pandoc shall be able to convert MD to HTML.""" + logs = container.run_and_wait( + timeout=10, + tty=True, + command=["bash", "-c", 'echo "**BOLD**" | pandoc'], + ) + assert "

BOLD

" in logs diff --git a/docker-stacks/tests/base-notebook/test_start_container.py b/docker-stacks/tests/base-notebook/test_start_container.py new file mode 100644 index 0000000..729e7ca --- /dev/null +++ b/docker-stacks/tests/base-notebook/test_start_container.py @@ -0,0 +1,85 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import time +from typing import Optional + +import pytest # type: ignore +import requests + +from tests.conftest import TrackedContainer, find_free_port + +LOGGER = logging.getLogger(__name__) + + +@pytest.mark.parametrize( + "env,expected_command,expected_start,expected_warnings", + [ + (None, "jupyter lab", True, []), + (["DOCKER_STACKS_JUPYTER_CMD=lab"], "jupyter lab", True, []), + (["RESTARTABLE=yes"], "run-one-constantly jupyter lab", True, []), + (["DOCKER_STACKS_JUPYTER_CMD=notebook"], "jupyter notebook", True, []), + (["DOCKER_STACKS_JUPYTER_CMD=server"], "jupyter server", True, []), + (["DOCKER_STACKS_JUPYTER_CMD=nbclassic"], "jupyter nbclassic", True, []), + ( + ["JUPYTERHUB_API_TOKEN=my_token"], + "jupyterhub-singleuser", + False, + ["WARNING: using start-singleuser.py"], + ), + ], +) +def test_start_notebook( + container: TrackedContainer, + http_client: requests.Session, + env: Optional[list[str]], + expected_command: str, + expected_start: bool, + expected_warnings: list[str], +) -> None: + """Test the notebook start-notebook.py script""" + LOGGER.info( + f"Test that the start-notebook.py launches the {expected_command} server from the env {env} ..." + ) + host_port = find_free_port() + running_container = container.run_detached( + tty=True, + environment=env, + ports={"8888/tcp": host_port}, + ) + # sleeping some time to let the server start + time.sleep(2) + logs = running_container.logs().decode("utf-8") + LOGGER.debug(logs) + # checking that the expected command is launched + assert ( + f"Executing: {expected_command}" in logs + ), f"Not the expected command ({expected_command}) was launched" + # checking errors and warnings in logs + assert "ERROR" not in logs, "ERROR(s) found in logs" + for exp_warning in expected_warnings: + assert exp_warning in logs, f"Expected warning {exp_warning} not found in logs" + warnings = TrackedContainer.get_warnings(logs) + assert len(expected_warnings) == len(warnings) + # checking if the server is listening + if expected_start: + resp = http_client.get(f"http://localhost:{host_port}") + assert resp.status_code == 200, "Server is not listening" + + +def test_tini_entrypoint( + container: TrackedContainer, pid: int = 1, command: str = "tini" +) -> None: + """Check that tini is launched as PID 1 + + Credits to the following answer for the ps options used in the test: + https://superuser.com/questions/632979/if-i-know-the-pid-number-of-a-process-how-can-i-get-its-name + """ + LOGGER.info(f"Test that {command} is launched as PID {pid} ...") + running_container = container.run_detached(tty=True) + # Select the PID 1 and get the corresponding command + cmd = running_container.exec_run(f"ps -p {pid} -o comm=") + output = cmd.output.decode("utf-8").strip("\n") + assert "ERROR" not in output + assert "WARNING" not in output + assert output == command, f"{command} shall be launched as pid {pid}, got {output}" diff --git a/docker-stacks/tests/conftest.py b/docker-stacks/tests/conftest.py new file mode 100644 index 0000000..a151eb6 --- /dev/null +++ b/docker-stacks/tests/conftest.py @@ -0,0 +1,157 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import os +import socket +from contextlib import closing +from typing import Any, Optional + +import docker +import pytest # type: ignore +import requests +from docker.models.containers import Container +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +LOGGER = logging.getLogger(__name__) + + +def find_free_port() -> str: + """Returns the available host port. Can be called in multiple threads/processes.""" + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(("", 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] # type: ignore + + +def get_health(container: Container) -> str: + api_client = docker.APIClient() + inspect_results = api_client.inspect_container(container.name) + return inspect_results["State"]["Health"]["Status"] # type: ignore + + +@pytest.fixture(scope="session") +def http_client() -> requests.Session: + """Requests session with retries and backoff.""" + s = requests.Session() + retries = Retry(total=5, backoff_factor=1) + s.mount("http://", HTTPAdapter(max_retries=retries)) + s.mount("https://", HTTPAdapter(max_retries=retries)) + return s + + +@pytest.fixture(scope="session") +def docker_client() -> docker.DockerClient: + """Docker client configured based on the host environment""" + return docker.from_env() + + +@pytest.fixture(scope="session") +def image_name() -> str: + """Image name to test""" + return os.environ["TEST_IMAGE"] + + +class TrackedContainer: + """Wrapper that collects docker container configuration and delays + container creation/execution. + + Parameters + ---------- + docker_client: docker.DockerClient + Docker client instance + image_name: str + Name of the docker image to launch + **kwargs: dict, optional + Default keyword arguments to pass to docker.DockerClient.containers.run + """ + + def __init__( + self, + docker_client: docker.DockerClient, + image_name: str, + **kwargs: Any, + ): + self.container: Optional[Container] = None + self.docker_client: docker.DockerClient = docker_client + self.image_name: str = image_name + self.kwargs: Any = kwargs + + def run_detached(self, **kwargs: Any) -> Container: + """Runs a docker container using the pre-configured image name + and a mix of the pre-configured container options and those passed + to this method. + + Keeps track of the docker.Container instance spawned to kill it + later. + + Parameters + ---------- + **kwargs: dict, optional + Keyword arguments to pass to docker.DockerClient.containers.run + extending and/or overriding key/value pairs passed to the constructor + + Returns + ------- + docker.Container + """ + all_kwargs = self.kwargs | kwargs + LOGGER.info(f"Running {self.image_name} with args {all_kwargs} ...") + self.container = self.docker_client.containers.run( + self.image_name, + **all_kwargs, + ) + return self.container + + def run_and_wait( + self, + timeout: int, + no_warnings: bool = True, + no_errors: bool = True, + no_failure: bool = True, + **kwargs: Any, + ) -> str: + running_container = self.run_detached(**kwargs) + rv = running_container.wait(timeout=timeout) + logs = running_container.logs().decode("utf-8") + assert isinstance(logs, str) + LOGGER.debug(logs) + if no_warnings: + assert not self.get_warnings(logs) + if no_errors: + assert not self.get_errors(logs) + assert no_failure == (rv["StatusCode"] == 0) + return logs + + @staticmethod + def get_errors(logs: str) -> list[str]: + return TrackedContainer._lines_starting_with(logs, "ERROR") + + @staticmethod + def get_warnings(logs: str) -> list[str]: + return TrackedContainer._lines_starting_with(logs, "WARNING") + + @staticmethod + def _lines_starting_with(logs: str, pattern: str) -> list[str]: + return [line for line in logs.splitlines() if line.startswith(pattern)] + + def remove(self) -> None: + """Kills and removes the tracked docker container.""" + if self.container: + self.container.remove(force=True) + + +@pytest.fixture(scope="function") +def container(docker_client: docker.DockerClient, image_name: str) -> Container: + """Notebook container with initial configuration appropriate for testing + (e.g., HTTP port exposed to the host for HTTP calls). + + Yields the container instance and kills it when the caller is done with it. + """ + container = TrackedContainer( + docker_client, + image_name, + detach=True, + ) + yield container + container.remove() diff --git a/docker-stacks/tests/datascience-notebook/test_julia_datascience.py b/docker-stacks/tests/datascience-notebook/test_julia_datascience.py new file mode 100644 index 0000000..3b1a55b --- /dev/null +++ b/docker-stacks/tests/datascience-notebook/test_julia_datascience.py @@ -0,0 +1,8 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from tests.conftest import TrackedContainer +from tests.run_command import run_command + + +def test_julia(container: TrackedContainer) -> None: + run_command(container, "julia --version") diff --git a/docker-stacks/tests/datascience-notebook/test_mimetypes.py b/docker-stacks/tests/datascience-notebook/test_mimetypes.py new file mode 100644 index 0000000..4fd8647 --- /dev/null +++ b/docker-stacks/tests/datascience-notebook/test_mimetypes.py @@ -0,0 +1,9 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from tests.conftest import TrackedContainer +from tests.R_mimetype_check import check_r_mimetypes + + +def test_mimetypes(container: TrackedContainer) -> None: + """Check if Rscript command for mimetypes can be executed""" + check_r_mimetypes(container) diff --git a/docker-stacks/tests/datascience-notebook/test_pluto_datascience.py b/docker-stacks/tests/datascience-notebook/test_pluto_datascience.py new file mode 100644 index 0000000..27c4aaf --- /dev/null +++ b/docker-stacks/tests/datascience-notebook/test_pluto_datascience.py @@ -0,0 +1,13 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import requests + +from tests.conftest import TrackedContainer +from tests.pluto_check import check_pluto_proxy + + +def test_pluto_proxy( + container: TrackedContainer, http_client: requests.Session +) -> None: + """Pluto proxy starts Pluto correctly""" + check_pluto_proxy(container, http_client) diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/a.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/a.sh new file mode 100644 index 0000000..61701e2 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/a.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +export MY_VAR=123 +echo "Inside a.sh MY_VAR variable has ${MY_VAR} value" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/b.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/b.sh new file mode 100644 index 0000000..fdca974 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/b.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Inside b.sh MY_VAR variable has ${MY_VAR} value" +echo "Changing value of MY_VAR" +export MY_VAR=456 +echo "After change inside b.sh MY_VAR variable has ${MY_VAR} value" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/c.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/c.sh new file mode 100644 index 0000000..ef69df3 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-change/c.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Inside c.sh MY_VAR variable has ${MY_VAR} value" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/executable.py b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/executable.py new file mode 100755 index 0000000..5fb2b9a --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/executable.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +print("Executable python file was successfully run") diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/non_executable.py b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/non_executable.py new file mode 100644 index 0000000..19c8d0b --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/non_executable.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +assert False diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/run-me.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/run-me.sh new file mode 100644 index 0000000..f4dc08a --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-executables/run-me.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +export SOME_VAR=123 diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/a.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/a.sh new file mode 100644 index 0000000..7dabeeb --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/a.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Started: a.sh" + +export OTHER_VAR=456 + +run-unknown-command + +echo "Finished: a.sh" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/b.py b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/b.py new file mode 100755 index 0000000..cc5b0a7 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/b.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import os +import sys + +print("Started: b.py") +print(f"OTHER_VAR={os.environ['OTHER_VAR']}") + +sys.exit(1) + +print("Finished: b.py") diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/c.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/c.sh new file mode 100644 index 0000000..a71e69f --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/c.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Started: c.sh" + +run-unknown-command diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/d.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/d.sh new file mode 100644 index 0000000..abc646a --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-failures/d.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +set -e + +echo "Started: d.sh" + +run-unknown-command + +echo "Finished: d.sh" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/a.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/a.sh new file mode 100644 index 0000000..61701e2 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/a.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +export MY_VAR=123 +echo "Inside a.sh MY_VAR variable has ${MY_VAR} value" diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/b.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/b.sh new file mode 100644 index 0000000..ab64e93 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/b.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Inside b.sh MY_VAR variable has ${MY_VAR} value" +echo "Unsetting MY_VAR" +unset MY_VAR diff --git a/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/c.sh b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/c.sh new file mode 100644 index 0000000..ef69df3 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/run-hooks-unset/c.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +echo "Inside c.sh MY_VAR variable has ${MY_VAR} value" diff --git a/docker-stacks/tests/docker-stacks-foundation/test_outdated.py b/docker-stacks/tests/docker-stacks-foundation/test_outdated.py new file mode 100644 index 0000000..6de18d3 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_outdated.py @@ -0,0 +1,22 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +import pytest # type: ignore + +from tests.conftest import TrackedContainer +from tests.package_helper import CondaPackageHelper + +LOGGER = logging.getLogger(__name__) + + +@pytest.mark.info +def test_outdated_packages( + container: TrackedContainer, requested_only: bool = True +) -> None: + """Getting the list of updatable packages""" + LOGGER.info(f"Checking outdated packages in {container.image_name} ...") + pkg_helper = CondaPackageHelper(container) + pkg_helper.check_updatable_packages(requested_only) + LOGGER.info(pkg_helper.get_outdated_summary(requested_only)) + LOGGER.info(f"\n{pkg_helper.get_outdated_table()}\n") diff --git a/docker-stacks/tests/docker-stacks-foundation/test_package_managers.py b/docker-stacks/tests/docker-stacks-foundation/test_package_managers.py new file mode 100644 index 0000000..29e0b64 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_package_managers.py @@ -0,0 +1,22 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import pytest # type: ignore + +from tests.conftest import TrackedContainer +from tests.run_command import run_command + + +@pytest.mark.parametrize( + "package_manager_command", + [ + "apt --version", + "conda --version", + "mamba --version", + "pip --version", + ], +) +def test_package_manager( + container: TrackedContainer, package_manager_command: str +) -> None: + """Test that package managers are installed and run.""" + run_command(container, package_manager_command) diff --git a/docker-stacks/tests/docker-stacks-foundation/test_packages.py b/docker-stacks/tests/docker-stacks-foundation/test_packages.py new file mode 100644 index 0000000..a1f7f75 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_packages.py @@ -0,0 +1,193 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +test_packages +~~~~~~~~~~~~~~~ +This test module tests if the R and Python packages installed can be imported. +It's a basic test aiming to prove that the package is working properly. + +The goal is to detect import errors that can be caused by incompatibilities between packages, for example: + +- #1012: issue importing `sympy` +- #966: issue importing `pyarrow` + +This module checks dynamically, through the `CondaPackageHelper`, +only the requested packages i.e. packages requested by `mamba install` in the `Dockerfile`s. +This means that it does not check dependencies. +This choice is a tradeoff to cover the main requirements while achieving a reasonable test duration. +However, it could be easily changed (or completed) to cover dependencies as well. +Use `package_helper.installed_packages()` instead of `package_helper.requested_packages()`. + +Example: + + $ make test/docker-stacks-foundation + + # [...] + # tests/docker-stacks-foundation/test_packages.py::test_python_packages + # -------------------------------- live log setup -------------------------------- + # 2024-01-21 17:46:43 [ INFO] Starting container quay.io/jupyter/docker-stacks-foundation ... (package_helper.py:55) + # 2024-01-21 17:46:43 [ INFO] Running quay.io/jupyter/docker-stacks-foundation with args {'detach': True, 'tty': True, 'command': ['bash', '-c', 'sleep infinity']} ... (conftest.py:99) + # 2024-01-21 17:46:44 [ INFO] Grabbing the list of manually requested packages ... (package_helper.py:83) + # -------------------------------- live log call --------------------------------- + # 2024-01-21 17:46:44 [ INFO] Testing the import of packages ... (test_packages.py:151) + # 2024-01-21 17:46:44 [ INFO] Trying to import mamba (test_packages.py:153) + # 2024-01-21 17:46:44 [ INFO] Trying to import jupyter_core (test_packages.py:153) + PASSED [ 17%] + # ------------------------------ live log teardown ------------------------------- + # [...] + +""" + +import logging +from collections.abc import Iterable +from typing import Callable + +import pytest # type: ignore + +from tests.conftest import TrackedContainer +from tests.package_helper import CondaPackageHelper + +LOGGER = logging.getLogger(__name__) + +# Mapping between package and module name +PACKAGE_MAPPING = { + # Python + "beautifulsoup4": "bs4", + "jupyter-pluto-proxy": "jupyter_pluto_proxy", + "matplotlib-base": "matplotlib", + "pytables": "tables", + "scikit-image": "skimage", + "scikit-learn": "sklearn", + # R + "randomforest": "randomForest", + "rcurl": "RCurl", + "rodbc": "RODBC", + "rsqlite": "DBI", +} + +# List of packages that cannot be tested in a standard way +EXCLUDED_PACKAGES = [ + "bzip2", + "ca-certificates", + "conda-forge::blas[build=openblas]", + "grpcio-status", + "grpcio", + "hdf5", + "jupyterlab-git", + "openssl", + "pandas[version='>", + "protobuf", + "python", + "r-irkernel", + "unixodbc", +] + + +@pytest.fixture(scope="function") +def package_helper(container: TrackedContainer) -> CondaPackageHelper: + """Return a package helper object that can be used to perform tests on installed packages""" + return CondaPackageHelper(container) + + +@pytest.fixture(scope="function") +def packages(package_helper: CondaPackageHelper) -> dict[str, set[str]]: + """Return the list of requested packages (i.e. packages explicitly installed excluding dependencies)""" + return package_helper.requested_packages() + + +def get_package_import_name(package: str) -> str: + """Perform a mapping between the python package name and the name used for the import""" + return PACKAGE_MAPPING.get(package, package) + + +def excluded_package_predicate(package: str) -> bool: + """Return whether a package is excluded from the list + (i.e. a package that cannot be tested with standard imports)""" + return package in EXCLUDED_PACKAGES + + +def python_package_predicate(package: str) -> bool: + """Predicate matching python packages""" + return not excluded_package_predicate(package) and not r_package_predicate(package) + + +def r_package_predicate(package: str) -> bool: + """Predicate matching R packages""" + return not excluded_package_predicate(package) and package.startswith("r-") + + +def _check_import_package( + package_helper: CondaPackageHelper, command: list[str] +) -> None: + """Generic function executing a command""" + LOGGER.debug(f"Trying to import a package with [{command}] ...") + exec_result = package_helper.running_container.exec_run(command) + assert ( + exec_result.exit_code == 0 + ), f"Import package failed, output: {exec_result.output}" + + +def check_import_python_package( + package_helper: CondaPackageHelper, package: str +) -> None: + """Try to import a Python package from the command line""" + _check_import_package(package_helper, ["python", "-c", f"import {package}"]) + + +def check_import_r_package(package_helper: CondaPackageHelper, package: str) -> None: + """Try to import an R package from the command line""" + _check_import_package(package_helper, ["R", "--slave", "-e", f"library({package})"]) + + +def _check_import_packages( + package_helper: CondaPackageHelper, + packages_to_check: Iterable[str], + check_function: Callable[[CondaPackageHelper, str], None], +) -> None: + """Test if packages can be imported + + Note: using a list of packages instead of a fixture for the list of packages + since pytest prevents the use of multiple yields + """ + failures = {} + LOGGER.info("Testing the import of packages ...") + for package in packages_to_check: + LOGGER.info(f"Trying to import {package}") + try: + check_function(package_helper, package) + except AssertionError as err: + failures[package] = err + if len(failures) > 0: + raise AssertionError(failures) + + +@pytest.fixture(scope="function") +def r_packages(packages: dict[str, set[str]]) -> Iterable[str]: + """Return an iterable of R packages""" + # package[2:] is to remove the leading "r-" appended on R packages + return map( + lambda package: get_package_import_name(package[2:]), + filter(r_package_predicate, packages), + ) + + +def test_r_packages( + package_helper: CondaPackageHelper, r_packages: Iterable[str] +) -> None: + """Test the import of specified R packages""" + _check_import_packages(package_helper, r_packages, check_import_r_package) + + +@pytest.fixture(scope="function") +def python_packages(packages: dict[str, set[str]]) -> Iterable[str]: + """Return an iterable of Python packages""" + return map(get_package_import_name, filter(python_package_predicate, packages)) + + +def test_python_packages( + package_helper: CondaPackageHelper, + python_packages: Iterable[str], +) -> None: + """Test the import of specified python packages""" + _check_import_packages(package_helper, python_packages, check_import_python_package) diff --git a/docker-stacks/tests/docker-stacks-foundation/test_python_version.py b/docker-stacks/tests/docker-stacks-foundation/test_python_version.py new file mode 100644 index 0000000..559853a --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_python_version.py @@ -0,0 +1,34 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +EXPECTED_PYTHON_VERSION = "3.11" + + +def test_python_version(container: TrackedContainer) -> None: + LOGGER.info( + f"Checking that python major.minor version is {EXPECTED_PYTHON_VERSION}" + ) + logs = container.run_and_wait( + timeout=5, + tty=True, + command=["python", "--version"], + ) + python = next(line for line in logs.splitlines() if line.startswith("Python ")) + full_version = python.split()[1] + major_minor_version = full_version[: full_version.rfind(".")] + + assert major_minor_version == EXPECTED_PYTHON_VERSION + + +def test_python_pinned_version(container: TrackedContainer) -> None: + LOGGER.info(f"Checking that pinned python version is {EXPECTED_PYTHON_VERSION}.*") + logs = container.run_and_wait( + timeout=5, + tty=True, + command=["cat", "/opt/conda/conda-meta/pinned"], + ) + assert f"python {EXPECTED_PYTHON_VERSION}.*" in logs diff --git a/docker-stacks/tests/docker-stacks-foundation/test_run_hooks.py b/docker-stacks/tests/docker-stacks-foundation/test_run_hooks.py new file mode 100644 index 0000000..87467f9 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_run_hooks.py @@ -0,0 +1,147 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +from pathlib import Path + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +THIS_DIR = Path(__file__).parent.resolve() + + +def test_run_hooks_zero_args(container: TrackedContainer) -> None: + logs = container.run_and_wait( + timeout=5, + tty=True, + no_failure=False, + command=["bash", "-c", "source /usr/local/bin/run-hooks.sh"], + ) + assert "Should pass exactly one directory" in logs + + +def test_run_hooks_two_args(container: TrackedContainer) -> None: + logs = container.run_and_wait( + timeout=5, + tty=True, + no_failure=False, + command=[ + "bash", + "-c", + "source /usr/local/bin/run-hooks.sh first-arg second-arg", + ], + ) + assert "Should pass exactly one directory" in logs + + +def test_run_hooks_missing_dir(container: TrackedContainer) -> None: + logs = container.run_and_wait( + timeout=5, + tty=True, + no_failure=False, + command=[ + "bash", + "-c", + "source /usr/local/bin/run-hooks.sh /tmp/missing-dir/", + ], + ) + assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in logs + + +def test_run_hooks_dir_is_file(container: TrackedContainer) -> None: + logs = container.run_and_wait( + timeout=5, + tty=True, + no_failure=False, + command=[ + "bash", + "-c", + "touch /tmp/some-file && source /usr/local/bin/run-hooks.sh /tmp/some-file", + ], + ) + assert "Directory /tmp/some-file doesn't exist or is not a directory" in logs + + +def test_run_hooks_empty_dir(container: TrackedContainer) -> None: + container.run_and_wait( + timeout=5, + tty=True, + command=[ + "bash", + "-c", + "mkdir /tmp/empty-dir && source /usr/local/bin/run-hooks.sh /tmp/empty-dir/", + ], + ) + + +def run_source_in_dir( + container: TrackedContainer, + subdir: str, + command_suffix: str = "", + no_failure: bool = True, +) -> str: + host_data_dir = THIS_DIR / subdir + cont_data_dir = "/home/jovyan/data" + # https://forums.docker.com/t/all-files-appear-as-executable-in-file-paths-using-bind-mount/99921 + # Unfortunately, Docker treats all files in mounter dir as executable files + # So we make a copy of the mounted dir inside a container + command = ( + "cp -r /home/jovyan/data/ /home/jovyan/data-copy/ &&" + "source /usr/local/bin/run-hooks.sh /home/jovyan/data-copy/" + command_suffix + ) + return container.run_and_wait( + timeout=5, + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + no_failure=no_failure, + command=["bash", "-c", command], + ) + + +def test_run_hooks_executables(container: TrackedContainer) -> None: + logs = run_source_in_dir( + container, + subdir="run-hooks-executables", + command_suffix="&& echo SOME_VAR is ${SOME_VAR}", + ) + + assert "Executable python file was successfully run" in logs + assert "Ignoring non-executable: /home/jovyan/data-copy//non_executable.py" in logs + assert "SOME_VAR is 123" in logs + + +def test_run_hooks_with_failures(container: TrackedContainer) -> None: + logs = run_source_in_dir(container, subdir="run-hooks-failures", no_failure=False) + + for file in ["a.sh", "b.py", "c.sh", "d.sh"]: + assert f"Started: {file}" in logs + + for file in ["a.sh"]: + assert f"Finished: {file}" in logs + for file in ["b.py", "c.sh", "d.sh"]: + assert f"Finished: {file}" not in logs + + for file in ["b.py", "c.sh"]: + assert ( + f"/home/jovyan/data-copy//{file} has failed, continuing execution" in logs + ) + + assert "OTHER_VAR=456" in logs + + +def test_run_hooks_unset(container: TrackedContainer) -> None: + logs = run_source_in_dir(container, subdir="run-hooks-unset") + + assert "Inside a.sh MY_VAR variable has 123 value" in logs + assert "Inside b.sh MY_VAR variable has 123 value" in logs + assert "Unsetting MY_VAR" in logs + assert "Inside c.sh MY_VAR variable has value" in logs + + +def test_run_hooks_change(container: TrackedContainer) -> None: + logs = run_source_in_dir(container, subdir="run-hooks-change") + + assert "Inside a.sh MY_VAR variable has 123 value" in logs + assert "Inside b.sh MY_VAR variable has 123 value" in logs + assert "Changing value of MY_VAR" in logs + assert "After change inside b.sh MY_VAR variable has 456 value" in logs + assert "Inside c.sh MY_VAR variable has 456 value" in logs diff --git a/docker-stacks/tests/docker-stacks-foundation/test_units.py b/docker-stacks/tests/docker-stacks-foundation/test_units.py new file mode 100644 index 0000000..cfdbc83 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_units.py @@ -0,0 +1,38 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer +from tests.images_hierarchy import get_test_dirs + +LOGGER = logging.getLogger(__name__) + + +def test_units(container: TrackedContainer) -> None: + """Various units tests + Add a py file in the `tests//units` dir, and it will be automatically tested + """ + short_image_name = container.image_name[container.image_name.rfind("/") + 1 :] + LOGGER.info(f"Running unit tests for: {short_image_name}") + + test_dirs = get_test_dirs(short_image_name) + + for test_dir in test_dirs: + host_data_dir = test_dir / "units" + LOGGER.info(f"Searching for units tests in {host_data_dir}") + cont_data_dir = "/home/jovyan/data" + + if not host_data_dir.exists(): + LOGGER.info(f"Not found unit tests for image: {container.image_name}") + continue + + for test_file in host_data_dir.iterdir(): + test_file_name = test_file.name + LOGGER.info(f"Running unit test: {test_file_name}") + + container.run_and_wait( + timeout=30, + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + command=["python", f"{cont_data_dir}/{test_file_name}"], + ) diff --git a/docker-stacks/tests/docker-stacks-foundation/test_user_options.py b/docker-stacks/tests/docker-stacks-foundation/test_user_options.py new file mode 100644 index 0000000..fb2b462 --- /dev/null +++ b/docker-stacks/tests/docker-stacks-foundation/test_user_options.py @@ -0,0 +1,307 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import pathlib +import time + +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +def test_uid_change(container: TrackedContainer) -> None: + """Container should change the UID of the default user.""" + logs = container.run_and_wait( + timeout=120, # usermod is slow so give it some time + tty=True, + user="root", + environment=["NB_UID=1010"], + command=["bash", "-c", "id && touch /opt/conda/test-file"], + ) + assert "uid=1010(jovyan)" in logs + + +def test_gid_change(container: TrackedContainer) -> None: + """Container should change the GID of the default user.""" + logs = container.run_and_wait( + timeout=10, + tty=True, + user="root", + environment=["NB_GID=110"], + command=["id"], + ) + assert "gid=110(jovyan)" in logs + assert "groups=110(jovyan),100(users)" in logs + + +def test_nb_user_change(container: TrackedContainer) -> None: + """Container should change the username (`NB_USER`) of the default user.""" + nb_user = "nayvoj" + running_container = container.run_detached( + tty=True, + user="root", + environment=[f"NB_USER={nb_user}", "CHOWN_HOME=yes"], + command=["bash", "-c", "sleep infinity"], + ) + + # Give the chown time to complete. + # Use sleep, not wait, because the container sleeps forever. + time.sleep(1) + LOGGER.info(f"Checking if the user is changed to {nb_user} by the start script ...") + output = running_container.logs().decode("utf-8") + assert "ERROR" not in output + assert "WARNING" not in output + assert ( + f"username: jovyan -> {nb_user}" in output + ), f"User is not changed to {nb_user}" + + LOGGER.info(f"Checking {nb_user} id ...") + command = "id" + expected_output = f"uid=1000({nb_user}) gid=100(users) groups=100(users)" + cmd = running_container.exec_run(command, user=nb_user, workdir=f"/home/{nb_user}") + output = cmd.output.decode("utf-8").strip("\n") + assert output == expected_output, f"Bad user {output}, expected {expected_output}" + + LOGGER.info(f"Checking if {nb_user} owns his home folder ...") + command = f'stat -c "%U %G" /home/{nb_user}/' + expected_output = f"{nb_user} users" + cmd = running_container.exec_run(command, workdir=f"/home/{nb_user}") + output = cmd.output.decode("utf-8").strip("\n") + assert ( + output == expected_output + ), f"Bad owner for the {nb_user} home folder {output}, expected {expected_output}" + + LOGGER.info( + f"Checking if a home folder of {nb_user} contains the 'work' folder with appropriate permissions ..." + ) + command = f'stat -c "%F %U %G" /home/{nb_user}/work' + expected_output = f"directory {nb_user} users" + cmd = running_container.exec_run(command, workdir=f"/home/{nb_user}") + output = cmd.output.decode("utf-8").strip("\n") + assert ( + output == expected_output + ), f"Folder work was not copied properly to {nb_user} home folder. stat: {output}, expected {expected_output}" + + +def test_chown_extra(container: TrackedContainer) -> None: + """Container should change the UID/GID of a comma-separated + CHOWN_EXTRA list of folders.""" + logs = container.run_and_wait( + timeout=120, # chown is slow so give it some time + tty=True, + user="root", + environment=[ + "NB_UID=1010", + "NB_GID=101", + "CHOWN_EXTRA=/home/jovyan,/opt/conda/bin", + "CHOWN_EXTRA_OPTS=-R", + ], + command=[ + "bash", + "-c", + "stat -c '%n:%u:%g' /home/jovyan/.bashrc /opt/conda/bin/jupyter", + ], + ) + assert "/home/jovyan/.bashrc:1010:101" in logs + assert "/opt/conda/bin/jupyter:1010:101" in logs + + +def test_chown_home(container: TrackedContainer) -> None: + """Container should change the NB_USER home directory owner and + group to the current value of NB_UID and NB_GID.""" + logs = container.run_and_wait( + timeout=120, # chown is slow so give it some time + tty=True, + user="root", + environment=[ + "CHOWN_HOME=yes", + "CHOWN_HOME_OPTS=-R", + "NB_USER=kitten", + "NB_UID=1010", + "NB_GID=101", + ], + command=["bash", "-c", "stat -c '%n:%u:%g' /home/kitten/.bashrc"], + ) + assert "/home/kitten/.bashrc:1010:101" in logs + + +def test_sudo(container: TrackedContainer) -> None: + """Container should grant passwordless sudo to the default user.""" + logs = container.run_and_wait( + timeout=10, + tty=True, + user="root", + environment=["GRANT_SUDO=yes"], + command=["sudo", "id"], + ) + assert "uid=0(root)" in logs + + +def test_sudo_path(container: TrackedContainer) -> None: + """Container should include /opt/conda/bin in the sudo secure_path.""" + logs = container.run_and_wait( + timeout=10, + tty=True, + user="root", + environment=["GRANT_SUDO=yes"], + command=["sudo", "which", "jupyter"], + ) + assert logs.rstrip().endswith("/opt/conda/bin/jupyter") + + +def test_sudo_path_without_grant(container: TrackedContainer) -> None: + """Container should include /opt/conda/bin in the sudo secure_path.""" + logs = container.run_and_wait( + timeout=10, + tty=True, + user="root", + command=["which", "jupyter"], + ) + assert logs.rstrip().endswith("/opt/conda/bin/jupyter") + + +def test_group_add(container: TrackedContainer) -> None: + """Container should run with the specified uid, gid, and secondary + group. It won't be possible to modify /etc/passwd since gid is nonzero, so + additionally verify that setting gid=0 is suggested in a warning. + """ + logs = container.run_and_wait( + timeout=5, + no_warnings=False, + user="1010:1010", + group_add=["users"], # Ensures write access to /home/jovyan + command=["id"], + ) + warnings = TrackedContainer.get_warnings(logs) + assert len(warnings) == 1 + assert "Try setting gid=0" in warnings[0] + assert "uid=1010 gid=1010 groups=1010,100(users)" in logs + + +def test_set_uid(container: TrackedContainer) -> None: + """Container should run with the specified uid and NB_USER. + The /home/jovyan directory will not be writable since it's owned by 1000:users. + Additionally, verify that "--group-add=users" is suggested in a warning to restore + write access. + """ + logs = container.run_and_wait( + timeout=5, + no_warnings=False, + user="1010", + command=["id"], + ) + assert "uid=1010(jovyan) gid=0(root)" in logs + warnings = TrackedContainer.get_warnings(logs) + assert len(warnings) == 1 + assert "--group-add=users" in warnings[0] + + +def test_set_uid_and_nb_user(container: TrackedContainer) -> None: + """Container should run with the specified uid and NB_USER.""" + logs = container.run_and_wait( + timeout=5, + no_warnings=False, + user="1010", + environment=["NB_USER=kitten"], + group_add=["users"], # Ensures write access to /home/jovyan + command=["id"], + ) + assert "uid=1010(kitten) gid=0(root)" in logs + warnings = TrackedContainer.get_warnings(logs) + assert len(warnings) == 1 + assert "user is kitten but home is /home/jovyan" in warnings[0] + + +def test_container_not_delete_bind_mount( + container: TrackedContainer, tmp_path: pathlib.Path +) -> None: + """Container should not delete host system files when using the (docker) + -v bind mount flag and mapping to /home/jovyan. + """ + d = tmp_path / "data" + d.mkdir() + p = d / "foo.txt" + p.write_text("some-content") + + container.run_and_wait( + timeout=5, + tty=True, + user="root", + working_dir="/home/", + environment=[ + "NB_USER=user", + "CHOWN_HOME=yes", + ], + volumes={d: {"bind": "/home/jovyan/data", "mode": "rw"}}, + command=["ls"], + ) + assert p.read_text() == "some-content" + assert len(list(tmp_path.iterdir())) == 1 + + +@pytest.mark.parametrize("enable_root", [False, True]) +def test_jupyter_env_vars_to_unset( + container: TrackedContainer, enable_root: bool +) -> None: + """Environment variables names listed in JUPYTER_ENV_VARS_TO_UNSET + should be unset in the final environment.""" + root_args = {"user": "root"} if enable_root else {} + logs = container.run_and_wait( + timeout=10, + tty=True, + environment=[ + "JUPYTER_ENV_VARS_TO_UNSET=SECRET_ANIMAL,UNUSED_ENV,SECRET_FRUIT", + "FRUIT=bananas", + "SECRET_ANIMAL=cats", + "SECRET_FRUIT=mango", + ], + command=[ + "bash", + "-c", + "echo I like ${FRUIT} and ${SECRET_FRUIT:-stuff}, and love ${SECRET_ANIMAL:-to keep secrets}!", + ], + **root_args, # type: ignore + ) + assert "I like bananas and stuff, and love to keep secrets!" in logs + + +def test_secure_path(container: TrackedContainer, tmp_path: pathlib.Path) -> None: + """Make sure that the sudo command has conda's python (not system's) on PATH. + See . + """ + d = tmp_path / "data" + d.mkdir() + p = d / "wrong_python.sh" + p.write_text('#!/bin/bash\necho "Wrong python executable invoked!"') + p.chmod(0o755) + + logs = container.run_and_wait( + timeout=5, + tty=True, + user="root", + volumes={p: {"bind": "/usr/bin/python", "mode": "ro"}}, + command=["python", "--version"], + ) + assert "Wrong python" not in logs + assert "Python" in logs + + +def test_startsh_multiple_exec(container: TrackedContainer) -> None: + """If start.sh is executed multiple times check that configuration only occurs once.""" + logs = container.run_and_wait( + timeout=10, + no_warnings=False, + tty=True, + user="root", + environment=["GRANT_SUDO=yes"], + command=["start.sh", "sudo", "id"], + ) + assert "uid=0(root)" in logs + warnings = TrackedContainer.get_warnings(logs) + assert len(warnings) == 1 + assert ( + "WARNING: start.sh is the default ENTRYPOINT, do not include it in CMD" + in warnings[0] + ) diff --git a/docker-stacks/tests/images_hierarchy.py b/docker-stacks/tests/images_hierarchy.py new file mode 100644 index 0000000..193ec27 --- /dev/null +++ b/docker-stacks/tests/images_hierarchy.py @@ -0,0 +1,34 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from pathlib import Path +from typing import Optional + +THIS_DIR = Path(__file__).parent.resolve() + +# Please, take a look at the hierarchy of the images here: +# https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#image-relationships +ALL_IMAGES = { + "docker-stacks-foundation": None, + "base-notebook": "docker-stacks-foundation", + "minimal-notebook": "base-notebook", + "scipy-notebook": "minimal-notebook", + "r-notebook": "minimal-notebook", + "julia-notebook": "minimal-notebook", + "tensorflow-notebook": "scipy-notebook", + "pytorch-notebook": "scipy-notebook", + "datascience-notebook": "scipy-notebook", + "pyspark-notebook": "scipy-notebook", + "all-spark-notebook": "pyspark-notebook", +} + + +def get_test_dirs( + short_image_name: Optional[str], +) -> list[Path]: + if short_image_name is None: + return [] + + test_dirs = get_test_dirs(ALL_IMAGES[short_image_name]) + if (current_image_tests_dir := THIS_DIR / short_image_name).exists(): + test_dirs.append(current_image_tests_dir) + return test_dirs diff --git a/docker-stacks/tests/julia-notebook/test_julia.py b/docker-stacks/tests/julia-notebook/test_julia.py new file mode 100644 index 0000000..3b1a55b --- /dev/null +++ b/docker-stacks/tests/julia-notebook/test_julia.py @@ -0,0 +1,8 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from tests.conftest import TrackedContainer +from tests.run_command import run_command + + +def test_julia(container: TrackedContainer) -> None: + run_command(container, "julia --version") diff --git a/docker-stacks/tests/julia-notebook/test_pluto.py b/docker-stacks/tests/julia-notebook/test_pluto.py new file mode 100644 index 0000000..27c4aaf --- /dev/null +++ b/docker-stacks/tests/julia-notebook/test_pluto.py @@ -0,0 +1,13 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import requests + +from tests.conftest import TrackedContainer +from tests.pluto_check import check_pluto_proxy + + +def test_pluto_proxy( + container: TrackedContainer, http_client: requests.Session +) -> None: + """Pluto proxy starts Pluto correctly""" + check_pluto_proxy(container, http_client) diff --git a/docker-stacks/tests/minimal-notebook/data/Jupyter_logo.svg b/docker-stacks/tests/minimal-notebook/data/Jupyter_logo.svg new file mode 100644 index 0000000..ab25508 --- /dev/null +++ b/docker-stacks/tests/minimal-notebook/data/Jupyter_logo.svg @@ -0,0 +1,90 @@ + +Group.svg +Created using Figma 0.90 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker-stacks/tests/minimal-notebook/data/notebook_math.ipynb b/docker-stacks/tests/minimal-notebook/data/notebook_math.ipynb new file mode 100644 index 0000000..5b028b1 --- /dev/null +++ b/docker-stacks/tests/minimal-notebook/data/notebook_math.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a69ceb22", + "metadata": {}, + "source": [ + "# A simple SymPy example" + ] + }, + { + "cell_type": "markdown", + "id": "3c43c88e", + "metadata": {}, + "source": [ + "First we import SymPy and initialize printing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b561917", + "metadata": { + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from sympy import diff, init_printing, integrate, sin, symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6454356-d5a6-481f-aaac-9abcc101026a", + "metadata": {}, + "outputs": [], + "source": [ + "init_printing()" + ] + }, + { + "cell_type": "markdown", + "id": "fbe0a2f3", + "metadata": {}, + "source": [ + "Create a few symbols:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c99d7f17", + "metadata": { + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "x, y, z = symbols(\"x y z\")" + ] + }, + { + "cell_type": "markdown", + "id": "f61dddac", + "metadata": {}, + "source": [ + "Here is a basic expression:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cfde73c", + "metadata": { + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "e = x**2 + 2.0 * y + sin(z)\n", + "e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb7eb1ad", + "metadata": { + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "diff(e, x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07441ea9", + "metadata": { + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "integrate(e, z)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docker-stacks/tests/minimal-notebook/data/notebook_svg.ipynb b/docker-stacks/tests/minimal-notebook/data/notebook_svg.ipynb new file mode 100644 index 0000000..08cdec1 --- /dev/null +++ b/docker-stacks/tests/minimal-notebook/data/notebook_svg.ipynb @@ -0,0 +1,43 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import SVG, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(SVG(filename=\"Jupyter_logo.svg\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker-stacks/tests/minimal-notebook/test_nbconvert.py b/docker-stacks/tests/minimal-notebook/test_nbconvert.py new file mode 100644 index 0000000..9c1c017 --- /dev/null +++ b/docker-stacks/tests/minimal-notebook/test_nbconvert.py @@ -0,0 +1,34 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +from pathlib import Path + +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +THIS_DIR = Path(__file__).parent.resolve() + + +@pytest.mark.parametrize("test_file", ["notebook_math", "notebook_svg"]) +@pytest.mark.parametrize("output_format", ["pdf", "html"]) +def test_nbconvert( + container: TrackedContainer, test_file: str, output_format: str +) -> None: + """Check if nbconvert is able to convert a notebook file""" + host_data_dir = THIS_DIR / "data" + cont_data_dir = "/home/jovyan/data" + output_dir = "/tmp" + LOGGER.info( + f"Test that the example notebook {test_file} can be converted to {output_format} ..." + ) + command = f"jupyter nbconvert {cont_data_dir}/{test_file}.ipynb --output-dir {output_dir} --to {output_format}" + logs = container.run_and_wait( + timeout=30, + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + command=["bash", "-c", command], + ) + expected_file = f"{output_dir}/{test_file}.{output_format}" + assert expected_file in logs, f"Expected file {expected_file} not generated" diff --git a/docker-stacks/tests/package_helper.py b/docker-stacks/tests/package_helper.py new file mode 100644 index 0000000..2fe85f5 --- /dev/null +++ b/docker-stacks/tests/package_helper.py @@ -0,0 +1,209 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# CondaPackageHelper is partially based on the work https://oerpli.github.io/post/2019/06/conda-outdated/. +# See copyright below. +# +# MIT License +# Copyright (c) 2019 Abraham Hinteregger +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import json +import logging +import re +from collections import defaultdict +from itertools import chain +from typing import Any, Optional + +from docker.models.containers import Container +from tabulate import tabulate + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +class CondaPackageHelper: + """Conda package helper permitting to get information about packages""" + + def __init__(self, container: TrackedContainer): + self.running_container: Container = CondaPackageHelper.start_container( + container + ) + self.requested: Optional[dict[str, set[str]]] = None + self.installed: Optional[dict[str, set[str]]] = None + self.available: Optional[dict[str, set[str]]] = None + self.comparison: list[dict[str, str]] = [] + + @staticmethod + def start_container(container: TrackedContainer) -> Container: + """Start the TrackedContainer and return an instance of a running container""" + LOGGER.info(f"Starting container {container.image_name} ...") + return container.run_detached( + tty=True, + command=["bash", "-c", "sleep infinity"], + ) + + @staticmethod + def _conda_export_command(from_history: bool) -> list[str]: + """Return the mamba export command with or without history""" + cmd = ["mamba", "env", "export", "-n", "base", "--json", "--no-builds"] + if from_history: + cmd.append("--from-history") + return cmd + + def installed_packages(self) -> dict[str, set[str]]: + """Return the installed packages""" + if self.installed is None: + LOGGER.info("Grabbing the list of installed packages ...") + self.installed = CondaPackageHelper._packages_from_json( + self._execute_command( + CondaPackageHelper._conda_export_command(from_history=False) + ) + ) + return self.installed + + def requested_packages(self) -> dict[str, set[str]]: + """Return the requested package (i.e. `mamba install `)""" + if self.requested is None: + LOGGER.info("Grabbing the list of manually requested packages ...") + self.requested = CondaPackageHelper._packages_from_json( + self._execute_command( + CondaPackageHelper._conda_export_command(from_history=True) + ) + ) + return self.requested + + def _execute_command(self, command: list[str]) -> str: + """Execute a command on a running container""" + rc = self.running_container.exec_run(command) + return rc.output.decode("utf-8") # type: ignore + + @staticmethod + def _packages_from_json(env_export: str) -> dict[str, set[str]]: + """Extract packages and versions from the lines returned by the list of specifications""" + # dependencies = filter(lambda x: isinstance(x, str), json.loads(env_export).get("dependencies")) + dependencies = json.loads(env_export).get("dependencies") + # Filtering packages installed through pip in this case it's a dict {'pip': ['toree==0.3.0']} + # Since we only manage packages installed through mamba here + dependencies = filter(lambda x: isinstance(x, str), dependencies) + packages_dict: dict[str, set[str]] = dict() + for split in map(lambda x: re.split("=?=", x), dependencies): + # default values + package = split[0] + version = set() + # This normally means we have package=version notation + if len(split) > 1: + # checking if it's a proper version by testing if the first char is a digit + if split[1][0].isdigit(): + # package + version case + version = set(split[1:]) + # The split was incorrect and the package shall not be split + else: + package = f"{split[0]}={split[1]}" + packages_dict[package] = version + return packages_dict + + def available_packages(self) -> dict[str, set[str]]: + """Return the available packages""" + if self.available is None: + LOGGER.info( + "Grabbing the list of available packages (can take a while) ..." + ) + # Keeping command line output since `mamba search --outdated --json` is way too long ... + self.available = CondaPackageHelper._extract_available( + self._execute_command(["mamba", "search", "--outdated", "--quiet"]) + ) + return self.available + + @staticmethod + def _extract_available(lines: str) -> dict[str, set[str]]: + """Extract packages and versions from the lines returned by the list of packages""" + ddict = defaultdict(set) + for line in lines.splitlines()[2:]: + match = re.match(r"^(\S+)\s+(\S+)", line, re.MULTILINE) + assert match is not None + pkg, version = match.groups() + ddict[pkg].add(version) + return ddict + + def check_updatable_packages( + self, requested_only: bool = True + ) -> list[dict[str, str]]: + """Check the updatable packages including or not dependencies""" + requested = self.requested_packages() + installed = self.installed_packages() + available = self.available_packages() + self.comparison = [] + for pkg, inst_vs in installed.items(): + if not requested_only or pkg in requested: + avail_vs = sorted( + list(available[pkg]), key=CondaPackageHelper.semantic_cmp + ) + if not avail_vs: + continue + current = min(inst_vs, key=CondaPackageHelper.semantic_cmp) + newest = avail_vs[-1] + if ( + avail_vs + and current != newest + and CondaPackageHelper.semantic_cmp(current) + < CondaPackageHelper.semantic_cmp(newest) + ): + self.comparison.append( + {"Package": pkg, "Current": current, "Newest": newest} + ) + return self.comparison + + @staticmethod + def semantic_cmp(version_string: str) -> Any: + """Manage semantic versioning for comparison""" + + def my_split(string: str) -> list[Any]: + def version_substrs(x: str) -> list[str]: + return re.findall(r"([A-z]+|\d+)", x) + + return list(chain(map(version_substrs, string.split(".")))) + + def str_ord(string: str) -> int: + num = 0 + for char in string: + num *= 255 + num += ord(char) + return num + + def try_int(version_str: str) -> int: + try: + return int(version_str) + except ValueError: + return str_ord(version_str) + + mss = list(chain(*my_split(version_string))) + return tuple(map(try_int, mss)) + + def get_outdated_summary(self, requested_only: bool = True) -> str: + """Return a summary of outdated packages""" + packages = self.requested if requested_only else self.installed + assert packages is not None + nb_packages = len(packages) + nb_updatable = len(self.comparison) + updatable_ratio = nb_updatable / nb_packages + return f"{nb_updatable}/{nb_packages} ({updatable_ratio:.0%}) packages could be updated" + + def get_outdated_table(self) -> str: + """Return a table of outdated packages""" + return tabulate(self.comparison, headers="keys") diff --git a/docker-stacks/tests/pluto_check.py b/docker-stacks/tests/pluto_check.py new file mode 100644 index 0000000..48116db --- /dev/null +++ b/docker-stacks/tests/pluto_check.py @@ -0,0 +1,30 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +import secrets +import time + +import requests + +from tests.conftest import TrackedContainer, find_free_port + +LOGGER = logging.getLogger(__name__) + + +def check_pluto_proxy( + container: TrackedContainer, http_client: requests.Session +) -> None: + host_port = find_free_port() + token = secrets.token_hex() + container.run_detached( + command=[ + "start-notebook.py", + f"--IdentityProvider.token={token}", + ], + ports={"8888/tcp": host_port}, + ) + # Give the server a bit of time to start + time.sleep(2) + resp = http_client.get(f"http://localhost:{host_port}/pluto?token={token}") + resp.raise_for_status() + assert "Pluto.jl notebooks" in resp.text, "Pluto.jl text not found in /pluto page" diff --git a/docker-stacks/tests/pyspark-notebook/test_spark.py b/docker-stacks/tests/pyspark-notebook/test_spark.py new file mode 100644 index 0000000..211432f --- /dev/null +++ b/docker-stacks/tests/pyspark-notebook/test_spark.py @@ -0,0 +1,14 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer +from tests.run_command import run_command + +LOGGER = logging.getLogger(__name__) + + +def test_spark_shell(container: TrackedContainer) -> None: + """Checking if Spark (spark-shell) is running properly""" + logs = run_command(container, 'spark-shell <<< "1+1"', timeout=60) + assert "res0: Int = 2" in logs, "spark-shell does not work" diff --git a/docker-stacks/tests/pyspark-notebook/units/unit_pandas_version.py b/docker-stacks/tests/pyspark-notebook/units/unit_pandas_version.py new file mode 100644 index 0000000..03920db --- /dev/null +++ b/docker-stacks/tests/pyspark-notebook/units/unit_pandas_version.py @@ -0,0 +1,5 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import pandas + +assert pandas.__version__ == "2.0.3" diff --git a/docker-stacks/tests/pyspark-notebook/units/unit_spark.py b/docker-stacks/tests/pyspark-notebook/units/unit_spark.py new file mode 100644 index 0000000..b6413fa --- /dev/null +++ b/docker-stacks/tests/pyspark-notebook/units/unit_spark.py @@ -0,0 +1,3 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import pyspark # noqa: F401 diff --git a/docker-stacks/tests/pytest.ini b/docker-stacks/tests/pytest.ini new file mode 100644 index 0000000..49f609a --- /dev/null +++ b/docker-stacks/tests/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +addopts = -ra --color=yes +log_cli = 1 +log_cli_level = DEBUG +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) +log_cli_date_format=%Y-%m-%d %H:%M:%S +markers = + info: marks tests as info (deselect with '-m "not info"') diff --git a/docker-stacks/tests/pytorch-notebook/units/unit_pytorch.py b/docker-stacks/tests/pytorch-notebook/units/unit_pytorch.py new file mode 100644 index 0000000..1b739a5 --- /dev/null +++ b/docker-stacks/tests/pytorch-notebook/units/unit_pytorch.py @@ -0,0 +1,5 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import torch + +print(torch.tensor([[1.0, 4.0, 7.0], [4.0, 9.0, 11.0]])) diff --git a/docker-stacks/tests/r-notebook/test_R_mimetypes.py b/docker-stacks/tests/r-notebook/test_R_mimetypes.py new file mode 100644 index 0000000..4fd8647 --- /dev/null +++ b/docker-stacks/tests/r-notebook/test_R_mimetypes.py @@ -0,0 +1,9 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from tests.conftest import TrackedContainer +from tests.R_mimetype_check import check_r_mimetypes + + +def test_mimetypes(container: TrackedContainer) -> None: + """Check if Rscript command for mimetypes can be executed""" + check_r_mimetypes(container) diff --git a/docker-stacks/tests/run_command.py b/docker-stacks/tests/run_command.py new file mode 100644 index 0000000..48e3cc0 --- /dev/null +++ b/docker-stacks/tests/run_command.py @@ -0,0 +1,22 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +def run_command( + container: TrackedContainer, + command: str, + timeout: int = 5, +) -> str: + """Runs the given package manager with its version argument.""" + + LOGGER.info(f"Test that the command '{command}' is working properly ...") + return container.run_and_wait( + timeout=timeout, + tty=True, + command=["bash", "-c", command], + ) diff --git a/docker-stacks/tests/run_tests.py b/docker-stacks/tests/run_tests.py new file mode 100755 index 0000000..90529e7 --- /dev/null +++ b/docker-stacks/tests/run_tests.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import argparse +import logging + +import plumbum + +from tests.images_hierarchy import get_test_dirs + +python3 = plumbum.local["python3"] + +LOGGER = logging.getLogger(__name__) + + +def test_image(short_image_name: str, registry: str, owner: str) -> None: + LOGGER.info(f"Testing image: {short_image_name}") + test_dirs = get_test_dirs(short_image_name) + LOGGER.info(f"Test dirs to be run: {test_dirs}") + with plumbum.local.env(TEST_IMAGE=f"{registry}/{owner}/{short_image_name}"): + ( + python3[ + "-m", + "pytest", + "--numprocesses", + "auto", + "-m", + "not info", + test_dirs, + ] + & plumbum.FG + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["docker.io", "quay.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + + args = arg_parser.parse_args() + + test_image(args.short_image_name, args.registry, args.owner) diff --git a/docker-stacks/tests/scipy-notebook/data/cython/helloworld.pyx b/docker-stacks/tests/scipy-notebook/data/cython/helloworld.pyx new file mode 100644 index 0000000..ad35e5a --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/data/cython/helloworld.pyx @@ -0,0 +1 @@ +print("Hello World") diff --git a/docker-stacks/tests/scipy-notebook/data/cython/setup.py b/docker-stacks/tests/scipy-notebook/data/cython/setup.py new file mode 100644 index 0000000..2ad2140 --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/data/cython/setup.py @@ -0,0 +1,6 @@ +# These lines are not sorted by isort on purpose +# see: https://stackoverflow.com/a/53356077/4881441 +from setuptools import setup # isort:skip +from Cython.Build import cythonize # isort:skip + +setup(ext_modules=cythonize("helloworld.pyx")) diff --git a/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_1.py b/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_1.py new file mode 100644 index 0000000..8ccf369 --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_1.py @@ -0,0 +1,26 @@ +# Matplotlib: Create a simple plot example. +# Refs: https://matplotlib.org/stable/gallery/lines_bars_and_markers/simple_plot.html + +# Optional test with [Matplotlib Jupyter Integration](https://github.com/matplotlib/ipympl) +# %matplotlib widget +import matplotlib.pyplot as plt +import numpy as np + +# Data for plotting +t = np.arange(0.0, 2.0, 0.01) +s = 1 + np.sin(2 * np.pi * t) + +fig, ax = plt.subplots() +ax.plot(t, s) + +ax.set( + xlabel="time (s)", + ylabel="voltage (mV)", + title="About as simple as it gets, folks", +) +ax.grid() + +# Note that the test can be run headless by checking if an image is produced +file_path = "/tmp/test.png" +fig.savefig(file_path) +print(f"File {file_path} saved") diff --git a/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_fonts_1.py b/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_fonts_1.py new file mode 100644 index 0000000..8944f2b --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/data/matplotlib/matplotlib_fonts_1.py @@ -0,0 +1,25 @@ +# Matplotlib: Test tex fonts +import matplotlib +import matplotlib.pyplot as plt + +matplotlib.rcParams["pgf.texsystem"] = "pdflatex" +matplotlib.rcParams.update( + { + "font.family": "serif", + "font.size": 18, + "axes.labelsize": 20, + "axes.titlesize": 24, + "figure.titlesize": 28, + } +) +matplotlib.rcParams["text.usetex"] = True + +fig, ax = plt.subplots(1, 1) +x = [1, 2] +y = [1, 2] +ax.plot(x, y, label="a label") +ax.legend(fontsize=15) + +file_path = "/tmp/test_fonts.png" +fig.savefig(file_path) +print(f"File {file_path} saved") diff --git a/docker-stacks/tests/scipy-notebook/test_cython.py b/docker-stacks/tests/scipy-notebook/test_cython.py new file mode 100644 index 0000000..092271b --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/test_cython.py @@ -0,0 +1,25 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +from pathlib import Path + +from tests.conftest import TrackedContainer + +THIS_DIR = Path(__file__).parent.resolve() + + +def test_cython(container: TrackedContainer) -> None: + host_data_dir = THIS_DIR / "data/cython" + cont_data_dir = "/home/jovyan/data" + + logs = container.run_and_wait( + timeout=10, + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + command=[ + "bash", + "-c", + # We copy our data to a temporary folder to be able to modify the directory + f"cp -r {cont_data_dir}/ /tmp/test/ && cd /tmp/test && python3 setup.py build_ext", + ], + ) + assert "building 'helloworld' extension" in logs diff --git a/docker-stacks/tests/scipy-notebook/test_extensions.py b/docker-stacks/tests/scipy-notebook/test_extensions.py new file mode 100644 index 0000000..d90cc62 --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/test_extensions.py @@ -0,0 +1,34 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging + +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) + + +@pytest.mark.skip(reason="Not yet compliant with JupyterLab 4") +@pytest.mark.parametrize( + "extension", + [ + "@bokeh/jupyter_bokeh", + "@jupyter-widgets/jupyterlab-manager", + "jupyter-matplotlib", + ], +) +def test_check_extension(container: TrackedContainer, extension: str) -> None: + """Basic check of each extension + + The list of installed extensions can be obtained through this command: + + $ jupyter labextension list + + """ + LOGGER.info(f"Checking the extension: {extension} ...") + container.run_and_wait( + timeout=10, + tty=True, + command=["jupyter", "labextension", "check", extension], + ) diff --git a/docker-stacks/tests/scipy-notebook/test_matplotlib.py b/docker-stacks/tests/scipy-notebook/test_matplotlib.py new file mode 100644 index 0000000..e96bc8c --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/test_matplotlib.py @@ -0,0 +1,55 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import logging +from pathlib import Path + +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +THIS_DIR = Path(__file__).parent.resolve() + + +@pytest.mark.parametrize( + "test_file,expected_file,description", + [ + ( + "matplotlib_1.py", + "test.png", + "Test that matplotlib can plot a graph and write it as an image ...", + ), + ( + "matplotlib_fonts_1.py", + "test_fonts.png", + "Test cm-super latex labels in matplotlib ...", + ), + ], +) +def test_matplotlib( + container: TrackedContainer, test_file: str, expected_file: str, description: str +) -> None: + """Various tests performed on matplotlib + + - Test that matplotlib is able to plot a graph and write it as an image + - Test matplotlib latex fonts, which depend on the cm-super package + """ + host_data_dir = THIS_DIR / "data/matplotlib" + cont_data_dir = "/home/jovyan/data" + output_dir = "/tmp" + LOGGER.info(description) + running_container = container.run_detached( + volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}}, + tty=True, + command=["bash", "-c", "sleep infinity"], + ) + command = f"python {cont_data_dir}/{test_file}" + cmd = running_container.exec_run(command) + LOGGER.debug(cmd.output.decode("utf-8")) + assert cmd.exit_code == 0, f"Command {command} failed" + # Checking if the file is generated + # https://stackoverflow.com/a/15895594/4413446 + command = f"test -s {output_dir}/{expected_file}" + cmd = running_container.exec_run(command) + LOGGER.debug(cmd.output.decode("utf-8")) + assert cmd.exit_code == 0, f"Command {command} failed" diff --git a/docker-stacks/tests/scipy-notebook/units/unit_pandas.py b/docker-stacks/tests/scipy-notebook/units/unit_pandas.py new file mode 100644 index 0000000..2190a0b --- /dev/null +++ b/docker-stacks/tests/scipy-notebook/units/unit_pandas.py @@ -0,0 +1,7 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import numpy as np +import pandas as pd + +np.random.seed(0) +print(pd.Series(np.random.randint(0, 7, size=10)).sum()) diff --git a/docker-stacks/tests/tensorflow-notebook/units/unit_tensorflow.py b/docker-stacks/tests/tensorflow-notebook/units/unit_tensorflow.py new file mode 100644 index 0000000..96446a5 --- /dev/null +++ b/docker-stacks/tests/tensorflow-notebook/units/unit_tensorflow.py @@ -0,0 +1,6 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import tensorflow as tf + +print(tf.constant("Hello, TensorFlow")) +print(tf.reduce_sum(tf.random.normal([1000, 1000]))) diff --git a/images/README.md b/images/README.md new file mode 100644 index 0000000..f8674f0 --- /dev/null +++ b/images/README.md @@ -0,0 +1,13 @@ +# Dockerfiles patchs + +The directory contains a list of patchs for original dockerfiles: + +## List of patchs: + +[PySpark 3.2.x java 11 support](patch/pyspark-notebook/Dockerfile.spark3.2.x#L6): Add "--add-opens options" to be compatible with java 11 (<=3.2.x) + +The options are picked from the java module: [JavaModuleOptions.java](https://github.com/apache/spark/blob/8706ccdf461c3b7f82b94b9e953ca4547f551ab1/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java) + +Please, checkk the following guide: [migrating-jdk-8-later-jdk-releases](https://docs.oracle.com/en/java/javase/16/migrate/migrating-jdk-8-later-jdk-releases.html#GUID-2F61F3A9-0979-46A4-8B49-325BA0EE8B66) for more information + + diff --git a/images/patch/pyspark-notebook/Dockerfile.spark3.2.x b/images/patch/pyspark-notebook/Dockerfile.spark3.2.x new file mode 100644 index 0000000..9b8931b --- /dev/null +++ b/images/patch/pyspark-notebook/Dockerfile.spark3.2.x @@ -0,0 +1,21 @@ + +#### Add "--add-opens options" to be compatible with java 11/17 (<=3.3.2) +#### The options are picked from the java module: +###### https://github.com/apache/spark/blob/8706ccdf461c3b7f82b94b9e953ca4547f551ab1/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java +###### Doc: # https://docs.oracle.com/en/java/javase/16/migrate/migrating-jdk-8-later-jdk-releases.html#GUID-2F61F3A9-0979-46A4-8B49-325BA0EE8B66 +ENV JDK_JAVA_OPTIONS $JDK_JAVA_OPTIONS \ + --add-opens=java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ + --add-opens=java.base/java.io=ALL-UNNAMED \ + --add-opens=java.base/java.net=ALL-UNNAMED \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ + --add-opens=java.base/sun.security.action=ALL-UNNAMED \ + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED diff --git a/python/.flake8 b/python/.flake8 new file mode 100644 index 0000000..87afe54 --- /dev/null +++ b/python/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 100 +extend-ignore = E203, W503 diff --git a/python/okdp/__init__.py b/python/okdp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/okdp/extension/__init__.py b/python/okdp/extension/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/okdp/extension/matrix/__init__.py b/python/okdp/extension/matrix/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/okdp/extension/matrix/constants.py b/python/okdp/extension/matrix/constants.py new file mode 100644 index 0000000..9c65e73 --- /dev/null +++ b/python/okdp/extension/matrix/constants.py @@ -0,0 +1,10 @@ +PYTHON_VERSION = "python_version" +SPARK_VERSION = "spark_version" +JAVA_VERSION = "java_version" +SCALA_VERSION = "scala_version" +HADOOP_VERSION = "hadoop_version" +SPARK_DOWNLOAD_URL = "spark_download_url" + +SPARK_DEV_TAG = "spark_dev_tag" +PYTHON_DEV_TAG = "python_dev_tag" + diff --git a/python/okdp/extension/matrix/utils/__init__.py b/python/okdp/extension/matrix/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/okdp/extension/matrix/utils/matrix_utils.py b/python/okdp/extension/matrix/utils/matrix_utils.py new file mode 100644 index 0000000..d7f9081 --- /dev/null +++ b/python/okdp/extension/matrix/utils/matrix_utils.py @@ -0,0 +1,98 @@ +from itertools import groupby +import itertools +from okdp.extension.matrix.constants import * + +def group_on(elem) -> str: + return str(elem[PYTHON_VERSION]) + "_".join(str(elem[JAVA_VERSION])) + str(elem[HADOOP_VERSION]) + +def intersect_dicts(dict1: dict, dict2: dict) -> dict: + """ Intersection between values of two dicts + if dict2 is empty, return dict1 + """ + dict_res = {**dict1, **dict2} + ### technical key for tag to build spark images without rebuilding bases images + #for key in dict1.keys(): + # if key == PYTHON_VERSION: + # dict_res[f"_{PYTHON_VERSION}"] = dict1.get(key) + ### Do the intersection + for key, value in dict_res.items(): + if key in dict1 and key in dict2: + dict_res[key] = list(set(value) & set(dict1[key])) + return dict_res + +def merge_dicts(dict1: dict, *args: dict) -> dict: + """ Merge multiple dicts by keeping all the values for the keys """ + if not args: + return dict1 + dict2 = args[0] + dict_res = {**dict1, **dict2} + for key, value in dict_res.items(): + if key in dict1 and key in dict2: + dict_res[key] = list(set(sum([value , dict1[key]], []))) + return dict_res if len(args) == 1 else merge_dicts(dict_res, *args[1:]) + +def join_versions(groups: list[dict], on_dict: dict) -> list[dict]: + """ Intersect groups of dicts values with the provided on_dict """ + ### Intersect the groups with on_dict + result = [] + for group in groups: + result.append(intersect_dicts(group, on_dict)) + + return result + +def group_versions_by(dicts: list[dict], group_on) -> list[dict]: + """ Group the spark versions by PYTHON_VERSION/JAVA_VERSION/HADOOP_VERSION + """ + ### Group the elements by python_version + python_groups = [] + data = sorted(dicts, key=group_on) + for k, g in groupby(data, group_on): + python_groups.append(list(g)) + + ### Merge the groups + result = [] + for group in python_groups: + result.extend(group) + return result + +def ignore_invalid_versions (dicts: list[dict]) -> list[dict]: + return list(filter(lambda elem: + elem.get(SPARK_VERSION) and + elem.get(JAVA_VERSION) and + elem.get(SCALA_VERSION) and + elem.get(HADOOP_VERSION) and elem.get(SPARK_DOWNLOAD_URL), + dicts)) + +def normalize_matrix(versions: list[dict]) -> list[dict]: + """" Convert to an array matrix + https://github.com/orgs/community/discussions/24981 + """ + + combinations = [] + for version in versions: + keys, values = zip(*version.items()) + combinations.extend([dict(zip(keys, v)) for v in itertools.product(*values)]) + + return combinations + +def normalize_scala_version(matrix: list[dict]) -> list[dict]: + """" dist is prefixed with -scala2.13 for scala version 2.13 + Ex.: https://archive.apache.org/dist/spark/spark-3.4.0/ + """ + result = [] + n = lambda key,value: (key, value) if key != SCALA_VERSION else (key, value if value == "2.13" else "") + return [dict(map(lambda kv: n(kv[0], kv[1]), e.items())) for e in matrix] + +def normalize_value (value: str) -> [str]: + """ Cast values to string and convert simple values to list for github strategy matrix input """ + if not type(value) == list: + return [str(value)] + return [str(v) for v in value] + +def remove_duplicates (dicts: list[dict]) -> list[dict]: + result = [] + for dict in dicts: + if dict not in result: + result.append(dict) + return result + diff --git a/python/okdp/extension/matrix/version_compatibility_matrix.py b/python/okdp/extension/matrix/version_compatibility_matrix.py new file mode 100644 index 0000000..fb3ac05 --- /dev/null +++ b/python/okdp/extension/matrix/version_compatibility_matrix.py @@ -0,0 +1,91 @@ +import json +import yaml +import argparse +import logging +from okdp.extension.matrix.constants import * + +from okdp.extension.matrix.utils.matrix_utils import ignore_invalid_versions, join_versions, group_versions_by, normalize_matrix, normalize_scala_version, normalize_value, remove_duplicates +from okdp.extension.matrix.utils.matrix_utils import group_on + +LOGGER = logging.getLogger(__name__) + +class VersionCompatibilityMatrix: + + def __init__(self, path: str, git_branch: str): + + LOGGER.info(f"Building version compatibilty matrix - Matrix path: {path}, Current git branch: {git_branch}") + + with open(path, 'r') as file: + doc = yaml.safe_load(file) + self.compatibility_matrix = doc.get("compatibility-matrix") + self.build_matrix = doc.get("build-matrix") + self.build_matrix = self.build_matrix if self.build_matrix else {} + # Handle branches like: feature/my-feature + self.git_branch = git_branch.replace("/", "-") + + self.__validate__() + self._normalize_values_() + + + def _normalize_values_(self): + """"Convert simple value to an array + Ex.: python_version: 3.11 => python_version: ['3.11'] + """ + self.compatibility_matrix = [dict(map(lambda kv: (kv[0], normalize_value(kv[1])), e.items())) for e in self.compatibility_matrix] + self.build_matrix = dict(map(lambda kv: (kv[0], normalize_value(kv[1])), self.build_matrix.items())) + + def __validate__(self): + if not self.compatibility_matrix: + raise ValueError(f"The compatibility-matrix section is mandatory") + + def generate_matrix(self) -> (str, dict): + + compatibility_versions_matrix = [dict(map(lambda kv: (kv[0], normalize_value(kv[1])), e.items())) for e in self.compatibility_matrix] + spark_version_matrix = normalize_matrix(ignore_invalid_versions(join_versions(group_versions_by(compatibility_versions_matrix, group_on=group_on), self.build_matrix))) + spark_version_matrix = normalize_scala_version(self.add_latest_dev_tags(spark_version_matrix)) + python_version_matrix = remove_duplicates([{PYTHON_VERSION: e.get(PYTHON_VERSION), PYTHON_DEV_TAG: e.get(PYTHON_DEV_TAG)} for e in spark_version_matrix ]) + return (spark_version_matrix, python_version_matrix) + + def add_latest_dev_tags(self, matrix: list[dict]) -> list[dict]: + """ The intermediate images are pushed with a latest uniq dev tag """ + for e in matrix: + e |= {f"{SPARK_DEV_TAG}": self.spark_dev_tag(e)} + e |= {f"{PYTHON_DEV_TAG}": self.python_dev_tag(e.get(PYTHON_VERSION))} + return matrix + + def python_dev_tag (self, python_version: str) -> str: + return f"python{python_version}-{self.git_branch}-latest" + + def spark_dev_tag(self, e: dict) -> str: + python_version = e.get(PYTHON_VERSION) + spark_version = e.get(SPARK_VERSION) + java_version = e.get(JAVA_VERSION) + scala_version = e.get(SCALA_VERSION) + scala_version = "2.12" if not scala_version else scala_version + return f"spark{spark_version}-python{python_version}-java{java_version}-scala{scala_version}-{self.git_branch}-latest" + +if __name__ == "__main__": + + logging.basicConfig(level=logging.INFO) + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--versions-matrix-path", + required=True, + help="The matrix path location containing the versions to build", + ) + + arg_parser.add_argument( + "--git-branch", + required=True, + help="The current git branch", + ) + + args = arg_parser.parse_args() + vcm = VersionCompatibilityMatrix(args.versions_matrix_path, args.git_branch) + #vcm = VersionCompatibilityMatrix(".build/.versions.yml", "main") + #with open(os.environ['GITHUB_OUTPUT'], 'a') as fh: + # print(f"spark_matrix={json.dumps(vcm.generate_matrix())}", file=fh) + (spark_matrix, python_version) = vcm.generate_matrix() + assert spark_matrix, ("The resulting build matrix was empty. Please, review your configuration '.build/.versions.yml'") + print(f"spark={json.dumps(spark_matrix)}") + print(f"python={json.dumps(python_version)}") diff --git a/python/okdp/extension/tagging/__init__.py b/python/okdp/extension/tagging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/okdp/extension/tagging/apply_tags.py b/python/okdp/extension/tagging/apply_tags.py new file mode 100755 index 0000000..524c238 --- /dev/null +++ b/python/okdp/extension/tagging/apply_tags.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +Modification of the original file: +* Generate and apply the tags on the fly instead of writing them to an intermediate file +""" +import argparse +import logging + +import plumbum + +from tagging.docker_runner import DockerRunner +from okdp.extension.tagging.get_taggers_and_manifests import get_taggers_and_manifests + +docker = plumbum.local["docker"] + +LOGGER = logging.getLogger(__name__) + +class Tagging: + + def __init__(self, short_image_name: str, registry: str, owner: str,): + self.short_image_name, self.tag = short_image_name.split(":") + self.registry = registry + self.owner = owner + + def apply_tags(self) -> None: + """ + Tags //:tag with the tags reported by all taggers for this image + """ + LOGGER.info(f"Tagging image: {self.short_image_name}") + + image = f"{self.registry}/{self.owner}/{self.short_image_name}:{self.tag}" + + tags = self.generate_tags() + + for tag in tags: + LOGGER.info(f"Applying tag: {tag}") + docker["tag", image, tag] & plumbum.FG + + def generate_tags(self) -> [str]: + """ + Generate tags for the image //:latest + """ + LOGGER.info(f"Tagging image: {self.short_image_name}") + taggers, _ = get_taggers_and_manifests(self.short_image_name) + + image = f"{self.registry}/{self.owner}/{self.short_image_name}:{ self.tag }" + tags = [f"{self.registry}/{self.owner}/{self.short_image_name}:{ self.tag }"] + with DockerRunner(image) as container: + for tagger in taggers: + tagger_name = tagger.__class__.__name__ + tag_value = tagger.tag_value(container) + LOGGER.info( + f"Calculated tag, tagger_name: {tagger_name} tag_value: {tag_value}" + ) + tags.append( + f"{self.registry}/{self.owner}/{self.short_image_name}:{tag_value}" + ) + + return tags + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["ghcr.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + args = arg_parser.parse_args() + + tagging = Tagging(args.short_image_name, args.registry, args.owner) + + tagging.apply_tags() diff --git a/python/okdp/extension/tagging/get_taggers_and_manifests.py b/python/okdp/extension/tagging/get_taggers_and_manifests.py new file mode 100644 index 0000000..509427c --- /dev/null +++ b/python/okdp/extension/tagging/get_taggers_and_manifests.py @@ -0,0 +1,29 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +Modification of the original file: +* Reference new custom extended ALL_IMAGES map +""" + +from typing import Optional + +from okdp.extension.tagging.images_hierarchy import ALL_IMAGES +from tagging.manifests import ManifestInterface +from tagging.taggers import TaggerInterface + + +def get_taggers_and_manifests( + short_image_name: Optional[str], +) -> tuple[list[TaggerInterface], list[ManifestInterface]]: + if short_image_name is None: + return [[], []] # type: ignore + + image_description = ALL_IMAGES[short_image_name] + parent_taggers, parent_manifests = get_taggers_and_manifests( + image_description.parent_image + ) + return ( + parent_taggers + image_description.taggers, + parent_manifests + image_description.manifests, + ) diff --git a/python/okdp/extension/tagging/images_hierarchy.py b/python/okdp/extension/tagging/images_hierarchy.py new file mode 100644 index 0000000..a92f21d --- /dev/null +++ b/python/okdp/extension/tagging/images_hierarchy.py @@ -0,0 +1,141 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +Extension of the original file: +* Remove simple form taggers (DateTagger/SHATagger, etc) which may conflicts with multiple python version built on the same date +* Add long form tagger to uniquely identify an image +* Remove the dependency for pyspark-notebook to parent images tags +""" + +from dataclasses import dataclass, field +from typing import Optional +from okdp.extension.tagging.taggers import ( + JavaMajorVersionTagger, + JavaVersionTagger, + LongTagger, + SparkVersionTagger, + ScalaVersionTagger, + ScalaMajorMinorVersionTagger, +) + +from tagging.manifests import ( + AptPackagesManifest, + CondaEnvironmentManifest, + JuliaPackagesManifest, + ManifestInterface, + RPackagesManifest, + SparkInfoManifest, +) +from tagging.taggers import ( + DateTagger, + JuliaVersionTagger, + JupyterHubVersionTagger, + JupyterLabVersionTagger, + PythonMajorMinorVersionTagger, + PythonVersionTagger, + PytorchVersionTagger, + RVersionTagger, + SHATagger, + TaggerInterface, + TensorflowVersionTagger, + UbuntuVersionTagger, +) + + +@dataclass +class ImageDescription: + parent_image: Optional[str] + taggers: list[TaggerInterface] = field(default_factory=list) + manifests: list[ManifestInterface] = field(default_factory=list) + + +ALL_IMAGES = { + "docker-stacks-foundation": ImageDescription( + parent_image=None, + taggers=[ + LongTagger(UbuntuVersionTagger(), PythonVersionTagger()), + LongTagger(PythonVersionTagger(), SHATagger()), + LongTagger(PythonVersionTagger(), SHATagger()), + LongTagger(PythonVersionTagger(), DateTagger()), + LongTagger(PythonMajorMinorVersionTagger(), DateTagger()), + ], + manifests=[CondaEnvironmentManifest(), AptPackagesManifest()], + ), + "base-notebook": ImageDescription( + parent_image="docker-stacks-foundation", + taggers=[ + LongTagger(PythonVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + ), + "minimal-notebook": ImageDescription(parent_image="base-notebook"), + "scipy-notebook": ImageDescription(parent_image="minimal-notebook"), + "r-notebook": ImageDescription( + parent_image="minimal-notebook", + taggers=[ + LongTagger(PythonVersionTagger(), RVersionTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), DateTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + manifests=[RPackagesManifest()], + ), + "julia-notebook": ImageDescription( + parent_image="minimal-notebook", + taggers=[ + LongTagger(PythonVersionTagger(), JuliaVersionTagger()), + LongTagger(PythonVersionTagger(), JuliaVersionTagger(), DateTagger()), + LongTagger(PythonVersionTagger(), JuliaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), JuliaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + manifests=[JuliaPackagesManifest()], + ), + "tensorflow-notebook": ImageDescription( + parent_image="scipy-notebook", + taggers=[ + LongTagger(PythonVersionTagger(), TensorflowVersionTagger()), + LongTagger(PythonVersionTagger(), TensorflowVersionTagger(), DateTagger()), + LongTagger(PythonVersionTagger(), TensorflowVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), TensorflowVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ] + ), + "pytorch-notebook": ImageDescription( + parent_image="scipy-notebook", + taggers=[ + LongTagger(PythonVersionTagger(), PytorchVersionTagger()), + LongTagger(PythonVersionTagger(), PytorchVersionTagger(), DateTagger()), + LongTagger(PythonVersionTagger(), PytorchVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), PytorchVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ] + ), + "datascience-notebook": ImageDescription( + parent_image="scipy-notebook", + taggers=[ + LongTagger(PythonVersionTagger(), RVersionTagger(), JuliaVersionTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), JuliaVersionTagger(), DateTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), JuliaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(PythonVersionTagger(), RVersionTagger(), JuliaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + manifests=[RPackagesManifest(), JuliaPackagesManifest()], + ), + "pyspark-notebook": ImageDescription( + parent_image=None, + taggers=[ + LongTagger(SparkVersionTagger(), PythonMajorMinorVersionTagger(), JavaMajorVersionTagger(), ScalaMajorMinorVersionTagger()), + LongTagger(SparkVersionTagger(), PythonMajorMinorVersionTagger(), JavaMajorVersionTagger(), ScalaMajorMinorVersionTagger(), SHATagger()), + LongTagger(SparkVersionTagger(), PythonMajorMinorVersionTagger(), JavaMajorVersionTagger(), ScalaMajorMinorVersionTagger(), DateTagger()), + LongTagger(SparkVersionTagger(), PythonVersionTagger(), JavaVersionTagger(), ScalaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(SparkVersionTagger(), PythonVersionTagger(), JavaVersionTagger(), ScalaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + manifests=[SparkInfoManifest()], + ), + "all-spark-notebook": ImageDescription( + parent_image="pyspark-notebook", + taggers=[ + LongTagger(SparkVersionTagger(), PythonVersionTagger(), RVersionTagger(), JavaVersionTagger(), ScalaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger()), + LongTagger(SparkVersionTagger(), PythonVersionTagger(), RVersionTagger(), JavaVersionTagger(), ScalaVersionTagger(), JupyterHubVersionTagger(), JupyterLabVersionTagger(), DateTagger()), + ], + manifests=[RPackagesManifest()], + ), +} diff --git a/python/okdp/extension/tagging/taggers.py b/python/okdp/extension/tagging/taggers.py new file mode 100644 index 0000000..3c2632e --- /dev/null +++ b/python/okdp/extension/tagging/taggers.py @@ -0,0 +1,75 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +* Add custom taggers (long form, scala etc taggers) +* Fix existing SparkVersionTagger when jdk '--add-opens' options are enabled for spark 3.2.x (java 11 compatibility) +* Unset JDK_JAVA_OPTIONS when asking for java program version +""" + +from functools import cache + +from docker.models.containers import Container + +from tagging.docker_runner import DockerRunner + +from tagging.taggers import * + +@cache +def _get_program_version(container: Container, program: str) -> str: + """"Get program version. Handle compatibility with spark 3.2.x/Java 11""" + return DockerRunner.run_simple_command(container, cmd=f"/bin/sh -c 'unset JDK_JAVA_OPTIONS && {program} --version'") + +def spark_version_prefix_line(cmd_output: str, search: str) -> int: + for idx, line in enumerate(cmd_output.split("\n"), start=0): + if line.find(search) != -1: + return idx + return -1 + +class SparkVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + SPARK_VERSION_LINE_PREFIX = r" /___/ .__/\_,_/_/ /_/\_\ version" + spark_version = _get_program_version(container, "spark-submit") + line = spark_version_prefix_line(spark_version, SPARK_VERSION_LINE_PREFIX) + assert line > -1, f"Spark version line starting with '{SPARK_VERSION_LINE_PREFIX}' not found" + version_line = spark_version.split("\n")[line] + assert version_line.startswith(SPARK_VERSION_LINE_PREFIX), f"Spark version line '{version_line}' does not starts with '{SPARK_VERSION_LINE_PREFIX}'" + return "spark-" + version_line.split(" ")[-1] + +class ScalaMajorMinorVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + full_version = ScalaVersionTagger.tag_value(container) + return full_version[: full_version.rfind(".")] + +class ScalaVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + SCALA_VERSION_LINE_PREFIX = "Using Scala version" + + spark_version = _get_program_version(container, "spark-submit") + line = spark_version_prefix_line(spark_version, SCALA_VERSION_LINE_PREFIX) + assert line > -1, f"Spark version line starting with '{SCALA_VERSION_LINE_PREFIX}' not found" + scala_version_line = spark_version.split("\n")[line] + assert scala_version_line.startswith(SCALA_VERSION_LINE_PREFIX), f"Scala version line '{scala_version_line}' does not starts with '{SCALA_VERSION_LINE_PREFIX}'" + return "scala-" + scala_version_line.split(" ")[3].split(",")[0] + +class JavaVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + return "java-" + _get_program_version(container, "java").split()[1] + +class JavaMajorVersionTagger(TaggerInterface): + @staticmethod + def tag_value(container: Container) -> str: + full_version = JavaVersionTagger.tag_value(container) + return full_version[: full_version.find(".")] + +class LongTagger(TaggerInterface): + """ Long form tagger which combines all versions in a single tag """ + def __init__(self, *taggers: TaggerInterface): + self.taggers = taggers + + def tag_value(self, container: Container) -> str: + return "-".join((lambda taggers : (t.tag_value(container) for t in taggers))(self.taggers)) diff --git a/python/okdp/patch/README.md b/python/okdp/patch/README.md new file mode 100644 index 0000000..e3b4663 --- /dev/null +++ b/python/okdp/patch/README.md @@ -0,0 +1,10 @@ +# Patchs + +The directory contains a list of a patched original python sourcen files in order to run the tests: + +## Add ghcr.io container registry +* [run_tests.py](tests/run_tests.py#L53) + +## Skip python version check +* [Skip python version](tests/docker-stacks-foundation/test_python_version.py#L17): Ability to run with any python version provided by the build-arg: PYTHON_VERSION + diff --git a/python/okdp/patch/tests/docker-stacks-foundation/test_python_version.py b/python/okdp/patch/tests/docker-stacks-foundation/test_python_version.py new file mode 100644 index 0000000..7a84e45 --- /dev/null +++ b/python/okdp/patch/tests/docker-stacks-foundation/test_python_version.py @@ -0,0 +1,41 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +Skip the hard check of python version to allow it's override at a build time: +--build-arg PYTHON_VERSION=3.x.x +""" + +import logging +import pytest # type: ignore + +from tests.conftest import TrackedContainer + +LOGGER = logging.getLogger(__name__) +EXPECTED_PYTHON_VERSION = "3.11" + +@pytest.mark.skip(reason="Allow override python version") +def test_python_version(container: TrackedContainer) -> None: + LOGGER.info( + f"Checking that python major.minor version is {EXPECTED_PYTHON_VERSION}" + ) + logs = container.run_and_wait( + timeout=5, + tty=True, + command=["python", "--version"], + ) + python = next(line for line in logs.splitlines() if line.startswith("Python ")) + full_version = python.split()[1] + major_minor_version = full_version[: full_version.rfind(".")] + + assert major_minor_version == EXPECTED_PYTHON_VERSION + +@pytest.mark.skip(reason="Allow override python version") +def test_python_pinned_version(container: TrackedContainer) -> None: + LOGGER.info(f"Checking that pinned python version is {EXPECTED_PYTHON_VERSION}.*") + logs = container.run_and_wait( + timeout=5, + tty=True, + command=["cat", "/opt/conda/conda-meta/pinned"], + ) + assert f"python {EXPECTED_PYTHON_VERSION}.*" in logs diff --git a/python/okdp/patch/tests/run_tests.py b/python/okdp/patch/tests/run_tests.py new file mode 100755 index 0000000..47f3d0d --- /dev/null +++ b/python/okdp/patch/tests/run_tests.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +""" +Add ghcr.io as part of allowed registries to prevent test failure +""" + +import argparse +import logging + +import plumbum + +from tests.images_hierarchy import get_test_dirs + +python3 = plumbum.local["python3"] + +LOGGER = logging.getLogger(__name__) + + +def test_image(short_image_name: str, registry: str, owner: str) -> None: + LOGGER.info(f"Testing image: {short_image_name}") + test_dirs = get_test_dirs(short_image_name) + LOGGER.info(f"Test dirs to be run: {test_dirs}") + with plumbum.local.env(TEST_IMAGE=f"{registry}/{owner}/{short_image_name}"): + ( + python3[ + "-m", + "pytest", + "--numprocesses", + "auto", + "-m", + "not info", + test_dirs, + ] + & plumbum.FG + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--short-image-name", + required=True, + help="Short image name", + ) + arg_parser.add_argument( + "--registry", + required=True, + type=str, + choices=["docker.io", "quay.io", "ghcr.io"], + help="Image registry", + ) + arg_parser.add_argument( + "--owner", + required=True, + help="Owner of the image", + ) + + args = arg_parser.parse_args() + + test_image(args.short_image_name, args.registry, args.owner) diff --git a/python/requirements-extended.txt b/python/requirements-extended.txt new file mode 100644 index 0000000..c3726e8 --- /dev/null +++ b/python/requirements-extended.txt @@ -0,0 +1 @@ +pyyaml diff --git a/python/tests/__init__.py b/python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/extension/__init__.py b/python/tests/extension/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/extension/matrix/__init__.py b/python/tests/extension/matrix/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/extension/matrix/conftest.py b/python/tests/extension/matrix/conftest.py new file mode 100644 index 0000000..7f9da4e --- /dev/null +++ b/python/tests/extension/matrix/conftest.py @@ -0,0 +1,49 @@ +import json +import pytest +from okdp.extension.matrix.version_compatibility_matrix import VersionCompatibilityMatrix # type: ignore + +class MockedVersionCompatibilityMatrix(VersionCompatibilityMatrix): + def __init__(self, compatibility_matrix: str, build_matrix: str, git_branch: str): + self.compatibility_matrix = compatibility_matrix + self.build_matrix = build_matrix + self.git_branch = git_branch + +def to_dict(str_as_json: str) -> list[dict]: + return json.loads(str_as_json) + +@pytest.fixture(scope="module") +def version_compatibility_matrix_data(): + return [ + {'python_version': ['3.9'], + 'spark_version': ['3.2.1', '3.2.2', '3.2.3', '3.2.4'], + 'java_version': ['11'], + 'scala_version': ['2.12', '2.13'], + 'hadoop_version': ['3.2'], + 'spark_download_url': ['https://archive.apache.org/dist/spark/'] + }, + {'python_version': ['3.10'], + 'spark_version': ['3.3.1', '3.3.2', '3.3.3', '3.3.4'], + 'java_version': ['17'], + 'scala_version': ['2.12', '2.13'], + 'hadoop_version': ['3'], + 'spark_download_url': ['https://archive.apache.org/dist/spark/'] + }, + {'python_version': ['3.11'], + 'spark_version': ['3.4.1', '3.4.2'], + 'java_version': ['17'], + 'scala_version': ['2.12', '2.13'], + 'hadoop_version': ['3'], + 'spark_download_url': ['https://archive.apache.org/dist/spark/'] + }, + {'python_version': ['3.11'], + 'spark_version': ['3.5.0'], + 'java_version': ['17', '21'], + 'scala_version': ['2.12', '2.13'], + 'hadoop_version': ['3'], + 'spark_download_url': ['https://archive.apache.org/dist/spark/'] + } + ] + + + + diff --git a/python/tests/extension/matrix/resources/expected_build_matrix_empty.json b/python/tests/extension/matrix/resources/expected_build_matrix_empty.json new file mode 100644 index 0000000..084ecb8 --- /dev/null +++ b/python/tests/extension/matrix/resources/expected_build_matrix_empty.json @@ -0,0 +1,242 @@ +[ + { + "python_version": "3.10", + "spark_version": "3.3.1", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.1-python3.10-java17-scala2.12-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.1", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.1-python3.10-java17-scala2.13-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.2", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.2-python3.10-java17-scala2.12-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.2", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.2-python3.10-java17-scala2.13-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.3", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.3-python3.10-java17-scala2.12-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.3", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.3-python3.10-java17-scala2.13-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.4", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.4-python3.10-java17-scala2.12-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.10", + "spark_version": "3.3.4", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.4-python3.10-java17-scala2.13-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.5.0", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.5.0-python3.11-java17-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.5.0", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.5.0-python3.11-java17-scala2.13-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.5.0", + "java_version": "21", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.5.0-python3.11-java21-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.5.0", + "java_version": "21", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.5.0-python3.11-java21-scala2.13-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.4.1", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.4.1-python3.11-java17-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.4.1", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.4.1-python3.11-java17-scala2.13-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.4.2", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.4.2-python3.11-java17-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.4.2", + "java_version": "17", + "scala_version": "2.13", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.4.2-python3.11-java17-scala2.13-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.1", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.1-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.1", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.1-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.2", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.2-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.2", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.2-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.3", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.3-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.3", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.3-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + } +] \ No newline at end of file diff --git a/python/tests/extension/matrix/test_version_compatibility_matrix.py b/python/tests/extension/matrix/test_version_compatibility_matrix.py new file mode 100644 index 0000000..3625917 --- /dev/null +++ b/python/tests/extension/matrix/test_version_compatibility_matrix.py @@ -0,0 +1,240 @@ +from tests.extension.matrix.conftest import MockedVersionCompatibilityMatrix, to_dict +from okdp.extension.matrix.utils.matrix_utils import group_versions_by +from okdp.extension.matrix.utils.matrix_utils import group_on +from okdp.extension.matrix.version_compatibility_matrix import VersionCompatibilityMatrix + +def test_group_versions_by( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + # Expected: + expected = """[ + { + "python_version": ["3.10"], + "spark_version": ["3.3.1","3.3.2","3.3.3","3.3.4"], + "java_version": ["17"], + "scala_version": ["2.12","2.13"], + "hadoop_version": ["3"], + "spark_download_url": ["https://archive.apache.org/dist/spark/"] + },{ + "python_version": ["3.11"], + "spark_version": ["3.5.0"], + "java_version": ["17","21"], + "scala_version": ["2.12","2.13"], + "hadoop_version": ["3"], + "spark_download_url": ["https://archive.apache.org/dist/spark/"] + },{ + "python_version": ["3.11"], + "spark_version": ["3.4.1","3.4.2"], + "java_version": ["17"], + "scala_version": ["2.12","2.13"], + "hadoop_version": ["3"], + "spark_download_url": ["https://archive.apache.org/dist/spark/"] + },{ + "python_version": ["3.9"], + "spark_version": [ "3.2.1", "3.2.2", "3.2.3", "3.2.4"], + "java_version": ["11"], + "scala_version": ["2.12", "2.13"], + "hadoop_version": ["3.2"], + "spark_download_url": ["https://archive.apache.org/dist/spark/"] + } + ] + """ + assert group_versions_by(version_compatibility_matrix_data, group_on=group_on) == to_dict(expected) + +def test_filter_by_empty_versions( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + version_compatibility_matrix = version_compatibility_matrix_data + build_matrix = {} + + # When: + vcm = MockedVersionCompatibilityMatrix(compatibility_matrix = version_compatibility_matrix, + build_matrix = build_matrix, + git_branch="main") + vcm._normalize_values_() + (spark_matrix, python_version) = vcm.generate_matrix() + + # Then: check the number of combinations when the build_matrix is empty + expected_nb_combinations = 24 + actual_nb_combinations = len(spark_matrix) + assert actual_nb_combinations == expected_nb_combinations, f"The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" + + # Then: check the expected combinations when the build_matrix is empty + with open("python/tests/extension/matrix/resources/expected_build_matrix_empty.json", 'r') as file: + expected_build_matrix_empty = file.read() + + assert to_dict(expected_build_matrix_empty) == spark_matrix + +def test_filter_by_spark_version( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + version_compatibility_matrix = version_compatibility_matrix_data + build_matrix = {"spark_version": "3.2.4"} + + # When: + vcm = MockedVersionCompatibilityMatrix(compatibility_matrix = version_compatibility_matrix, + build_matrix = build_matrix, + git_branch="main") + vcm._normalize_values_() + (spark_matrix, python_version) = vcm.generate_matrix() + + # Then: check the number of combinations when the build_matrix is empty + expected_nb_combinations = 2 + actual_nb_combinations = len(spark_matrix) + expected_test_filter_spark_version = """[ + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + } + ]""" + assert actual_nb_combinations == expected_nb_combinations, f"The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" + + assert spark_matrix == to_dict(expected_test_filter_spark_version) + assert python_version == to_dict("""[{"python_version": "3.9", "python_dev_tag": "python3.9-main-latest"}]""") + +def test_filter_by_spark_version_and_scala_version( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + version_compatibility_matrix = version_compatibility_matrix_data + build_matrix = {"spark_version": "3.2.4", "scala_version": "2.13"} + + # When: + vcm = MockedVersionCompatibilityMatrix(compatibility_matrix = version_compatibility_matrix, + build_matrix = build_matrix, + git_branch="main") + vcm._normalize_values_() + (spark_matrix, python_version) = vcm.generate_matrix() + + # Then: check the number of combinations when the build_matrix is empty + expected_nb_combinations = 1 + actual_nb_combinations = len(spark_matrix) + expected_test_filter_spark_version = """[ + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "2.13", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.13-main-latest", + "python_dev_tag": "python3.9-main-latest" + } + ]""" + assert actual_nb_combinations == expected_nb_combinations, f"The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" + + assert spark_matrix == to_dict(expected_test_filter_spark_version) + assert python_version == to_dict("""[{"python_version": "3.9", "python_dev_tag": "python3.9-main-latest"}]""") + +def test_filter_by_multiple_versions( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + version_compatibility_matrix = version_compatibility_matrix_data + # The python_version is not supported by the compatibilty matrix + build_matrix = { + "python_version": ["3.9", "3.10", "3.11"], + "spark_version": ["3.2.4", "3.3.4", "3.4.2", "3.5.0"], + "java_version": [11, 17], + "scala_version": [2.12] + } + + # When: + vcm = MockedVersionCompatibilityMatrix(compatibility_matrix = version_compatibility_matrix, + build_matrix = build_matrix, + git_branch="main") + vcm._normalize_values_() + (spark_matrix, python_version) = vcm.generate_matrix() + + # Then: check the number of combinations when the build_matrix is empty + expected_nb_combinations = 4 + actual_nb_combinations = len(spark_matrix) + assert actual_nb_combinations == expected_nb_combinations, f"spark_matrix: The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" + + assert spark_matrix == to_dict("""[ + { + "python_version": "3.10", + "spark_version": "3.3.4", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.3.4-python3.10-java17-scala2.12-main-latest", + "python_dev_tag": "python3.10-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.5.0", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.5.0-python3.11-java17-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.11", + "spark_version": "3.4.2", + "java_version": "17", + "scala_version": "", + "hadoop_version": "3", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.4.2-python3.11-java17-scala2.12-main-latest", + "python_dev_tag": "python3.11-main-latest" + }, + { + "python_version": "3.9", + "spark_version": "3.2.4", + "java_version": "11", + "scala_version": "", + "hadoop_version": "3.2", + "spark_download_url": "https://archive.apache.org/dist/spark/", + "spark_dev_tag": "spark3.2.4-python3.9-java11-scala2.12-main-latest", + "python_dev_tag": "python3.9-main-latest" + } + ]""") + assert python_version == to_dict("""[ + {"python_version": "3.10", "python_dev_tag": "python3.10-main-latest"}, + {"python_version": "3.11", "python_dev_tag": "python3.11-main-latest"}, + {"python_version": "3.9", "python_dev_tag": "python3.9-main-latest"} + ]""") + +def test_filter_by_wrong_version( + version_compatibility_matrix_data: list[dict], +) -> None: + # Given: version_compatibility_matrix_data + version_compatibility_matrix = version_compatibility_matrix_data + # The python_version is not supported by the compatibilty matrix + build_matrix = {"python_version": "3.7"} + + # When: + vcm = MockedVersionCompatibilityMatrix(compatibility_matrix = version_compatibility_matrix, + build_matrix = build_matrix, + git_branch="main") + vcm._normalize_values_() + (spark_matrix, python_version) = vcm.generate_matrix() + + # Then: check the number of combinations when the build_matrix is empty + expected_nb_combinations = 0 + actual_nb_combinations = len(spark_matrix) + assert actual_nb_combinations == expected_nb_combinations, f"spark_matrix: The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" + assert len(python_version) == expected_nb_combinations, f"python_version: The number of elements should be {expected_nb_combinations}, got {actual_nb_combinations}" diff --git a/python/tests/pytest.ini b/python/tests/pytest.ini new file mode 100644 index 0000000..d61d029 --- /dev/null +++ b/python/tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +xfail_strict=true