From 1beb2a94b1ded4f33c3a26996f55af08080cf084 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Wed, 1 Oct 2025 14:55:29 +0200 Subject: [PATCH 01/13] Add support to DockerEngine --- repo2docker/docker.py | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/repo2docker/docker.py b/repo2docker/docker.py index 984c074d..f1742130 100644 --- a/repo2docker/docker.py +++ b/repo2docker/docker.py @@ -13,7 +13,7 @@ from pathlib import Path from iso8601 import parse_date -from traitlets import Dict, List, Unicode +from traitlets import Dict, List, Unicode, default import docker @@ -66,6 +66,35 @@ class DockerEngine(ContainerEngine): string_output = True + cli = Unicode( + "", + help=""" + The commandline for Docker. + """, + config=True, + ) + + @default("cli") + def _default_cli(self): + for cli in ["docker", "podman"]: + docker_version = subprocess.run([cli, "version"]) + if docker_version.returncode == 0: + docker_cli = cli + break + else: + raise RuntimeError("The docker or podman commandline client must be installed") + + # docker buildx is based in a plugin that might not be installed + # https://github.com/docker/buildx + # + # podman buildx command is an alias of podman build. + # Not all buildx build features are available in Podman. + docker_buildx_version = subprocess.run([docker_cli, "buildx", "version"]) + if docker_buildx_version.returncode: + raise RuntimeError("The docker buildx plugin must be installed") + + return docker_cli + extra_init_args = Dict( {}, help=""" @@ -105,16 +134,7 @@ def build( platform=None, **kwargs, ): - if not shutil.which("docker"): - raise RuntimeError("The docker commandline client must be installed") - - # docker buildx is based in a plugin that might not be installed - # https://github.com/docker/buildx - docker_buildx_version = subprocess.run(["docker", "buildx", "version"]) - if docker_buildx_version.returncode: - raise RuntimeError("The docker buildx plugin must be installed") - - args = ["docker", "buildx", "build", "--progress", "plain"] + args = [self.cli, "buildx", "build", "--progress", "plain"] if load: if push: raise ValueError( @@ -171,7 +191,7 @@ def inspect_image(self, image): Return image configuration if it exists, otherwise None """ proc = subprocess.run( - ["docker", "image", "inspect", image], capture_output=True + [self.cli, "image", "inspect", image], capture_output=True ) if proc.returncode != 0: @@ -200,7 +220,7 @@ def docker_login(self, username, password, registry): try: subprocess.run( [ - "docker", + self.cli, "login", "--username", username, From 5b37d9af9adee9d488df06341e1e56f28b0decc2 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Wed, 1 Oct 2025 12:43:30 +0200 Subject: [PATCH 02/13] Use "container" instead of Docker when possible in index.md --- docs/source/index.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/source/index.md b/docs/source/index.md index 831b8877..fae2308a 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,15 +1,19 @@ # Welcome to `repo2docker`'s documentation +```{important} +Despite the name, `repo2docker` can be used by container technology other than [Docker](https://docs.docker.com/engine/), for example [Podman](https://podman.io/). +``` + `repo2docker` lets you **reproducibly build and run user environment container images for interactive computing and data workflows from source code repositories**. Optionally, the container image can be pushed to a Docker registry. Also, `repo2docker` is the tool used to build container images for [JupyterHub](https://jupyterhub.readthedocs.io/en/stable/) and the tool used by [BinderHub](https://binderhub.readthedocs.io) to build images on demand. ::::{grid} :::{grid-item-card} 🔧 Build reproducible data science environments from repositories -Build a reproducible data science environment as a Docker image and execute code interactively. Use many [configuration files](#config-files) to control language, tools, and setup instructions. +Build a reproducible data science environment as a container image and execute code interactively. Use many [configuration files](#config-files) to control language, tools, and setup instructions. ::: :::{grid-item-card} 🚀 Deploy environments in JupyterHub or Binder -Push environment images to a Docker registry for re-use in data science environment services like [JupyterHub](https://jupyterhub.readthedocs.io) or [a Binder instance](https://mybinder.org), or for other communities to build upon your base environment. +Push environment images to a container registry for re-use in data science environment services like [JupyterHub](https://jupyterhub.readthedocs.io) or [a Binder instance](https://mybinder.org), or for other communities to build upon your base environment. ::: :::{grid-item-card} ☁️ Host repositories in many providers Host repositories in: a Git server like [GitHub](https://github.com/) or [GitLab](https://gitlab.com/), an open science repository like [Zenodo](https://zenodo.org) or [Figshare](https://figshare.com), a hosted data platform like a [Dataverse installation](https://dataverse.org/), an archive like the @@ -19,7 +23,7 @@ Host repositories in: a Git server like [GitHub](https://github.com/) or [GitLab ## What is a user environment container image and why would I build one with `repo2docker`? -A **user environment container image** contains the entire software environment that a user may access from an interactive data science session. For example, it might contain many **programming languages**, **software for data analysis**, or even **content files and datasets** available to anybody that accesses that environment. Container images are built with [Docker](https://www.docker.com/), a standard open source tool for defining, building, and deploying images. +A **user environment container image** contains the entire software environment that a user may access from an interactive data science session. For example, it might contain many **programming languages**, **software for data analysis**, or even **content files and datasets** available to anybody that accesses that environment. Container images are built in accordance with the spectifications published by the [Open Container Initiative](https://opencontainers.org/). Many data science platforms and services like [JupyterHub](https://jupyterhub.readthedocs.io) and [Binder](https://mybinder.org) launch interactive data science sessions **with a user environment container image attached**, meaning that the user gains access to whatever is in the container image. In short, this allows somebody to define and build the user image one time, in a way that users can reproducibly re-use many times. @@ -43,9 +47,9 @@ repo2docker It performs these steps: 1. Inspects the repository for [configuration files](#config-files). These will be used to build the environment needed to run the repository. -2. Builds a Docker image with an environment specified in these [configuration files](#config-files). +2. Builds a container image with an environment specified in these [configuration files](#config-files). 3. Runs the image to let you explore the repository interactively via Jupyter notebooks, RStudio, or many other interfaces (this is optional). -4. Pushes the images to a Docker registry so that it may be accessed remotely (this is optional). +4. Pushes the images to a container registry so that it may be accessed remotely (this is optional). [swhid]: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html @@ -55,7 +59,7 @@ Please report [bugs](https://github.com/jupyterhub/repo2docker/issues), ## Get started with `repo2docker` -This tutorial walks you through setting up `repo2docker`, building your first environment image, and running it locally with Docker. +This tutorial walks you through setting up `repo2docker`, building your first environment image, and running it locally with a container engine. ```{toctree} :maxdepth: 2 From a0e43fc0c98daa48d2b7765023cc50ec5dd6ab45 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Wed, 1 Oct 2025 13:46:20 +0200 Subject: [PATCH 03/13] Use "container" instead of Docker when possible in start.md and add note about DOCKER_HOST environment variable. --- docs/source/start.md | 63 ++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/docs/source/start.md b/docs/source/start.md index ef5fe7d2..de1d5bad 100644 --- a/docs/source/start.md +++ b/docs/source/start.md @@ -2,34 +2,39 @@ This tutorial guides you through installing `repo2docker` and building your first environment image. -(install)= +## Prerequisite -## Install `repo2docker` +### Python -`repo2docker` requires Python 3.6 or above on Linux and macOS. +`repo2docker` requires Python 3.6 or above. -:::{admonition} Windows support is experimental +### Container Engine -This [article about using Windows and the WSL](https://nickjanetakis.com/blog/setting-up-docker-for-windows-and-wsl-to-work-flawlessly) (Windows Subsystem for Linux or -Bash on Windows) provides additional information about Windows and Docker. -::: +`repo2docker` requires a container engine compatible with the specification published by the [Open Container Initiative](https://opencontainers.org/). -### Prerequisite: Install Docker +#### Docker -Install [Docker](https://www.docker.com), as it is required to build Docker images. -The [Community Edition](https://docs.docker.com/install/) is available for free. +```{important} +Only the [Docker Engine](https://docs.docker.com/engine/) is an open source. [Docker Desktop](https://docs.docker.com/get-started/get-docker/) requires a license. +``` -Recent versions of Docker are recommended. +Follow [Docker's official installation steps](https://docs.docker.com/get-started/get-docker/). -### Install `repo2docker` with `pip` +#### Podman -```{warning} -The name of the package on [PyPI](https://pypi.org/) is [`jupyter-repo2docker`](https://pypi.org/project/jupyter-repo2docker/) instead of `repo2docker`. -``` +Follow [Podman's official installation steps](https://podman.io/docs/installation). -We recommend installing `repo2docker` with the `pip` tool: +And configure the `DOCKER_HOST` environment variable following [Podman's official procedure](https://podman-desktop.io/docs/migrating-from-docker/using-the-docker_host-environment-variable#procedure). -``` +(install)= + +## Install `repo2docker` + +### Install `repo2docker` with `pip` + +It is recommend to install `repo2docker` with the `pip` tool: + +```bash python3 -m pip install jupyter-repo2docker ``` @@ -37,12 +42,24 @@ python3 -m pip install jupyter-repo2docker ## Build a repository with `repo2docker` -Now that you've installed Docker and `repo2docker`, we can build a repository. -To do so, follow these steps. +Now that you've installed a container engine and `repo2docker`, you can build a repository. +To do so, continue following this guide. + +### Start the container engine + +Ensure that the container engine is running. -### Start Docker +#### Docker -Follow the [instructions for starting Docker](https://docs.docker.com/engine/daemon/start/) to start a Docker process. +Follow the [offcial instructions for starting Docker](https://docs.docker.com/engine/daemon/start/). + +#### Podman + +Run + +```bash +podman info +``` ### Build an image from a URL @@ -55,7 +72,7 @@ jupyter-repo2docker https://github.com/binder-examples/requirements You'll see `repo2docker` take the following actions: 1. Inspect the repository for [configuration files](#config-files). It will detect the `requirements.txt` file in the repository. -2. Build a Docker image using the configuration files. In this case, the `requirements.txt` file will correspond to a Python environment. +2. Build a container image using the configuration files. In this case, the `requirements.txt` file will correspond to a Python environment. 3. Run the image to let you explore the repository interactively. Click the link provided and you'll be taken to an interactive Jupyter Notebook interface where you can run commands interactively inside the environment. @@ -63,4 +80,4 @@ Click the link provided and you'll be taken to an interactive Jupyter Notebook i ## Learn more This is a simple example building an environment image for your repository. -To learn more about the kinds of source repositories, environments, and use-cases that repo2docker supports, see [the `repo2docker` user guide](./use/index.md). +To learn more about the kinds of source repositories, environments, and use-cases that `repo2docker` supports, see [the `repo2docker` user guide](./use/index.md). From bf325784320e68c20bf4103527ea50c1fe1ea382 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 13:06:28 +0000 Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- repo2docker/docker.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/repo2docker/docker.py b/repo2docker/docker.py index f1742130..3b08c269 100644 --- a/repo2docker/docker.py +++ b/repo2docker/docker.py @@ -82,17 +82,19 @@ def _default_cli(self): docker_cli = cli break else: - raise RuntimeError("The docker or podman commandline client must be installed") + raise RuntimeError( + "The docker or podman commandline client must be installed" + ) # docker buildx is based in a plugin that might not be installed # https://github.com/docker/buildx - # + # # podman buildx command is an alias of podman build. # Not all buildx build features are available in Podman. docker_buildx_version = subprocess.run([docker_cli, "buildx", "version"]) if docker_buildx_version.returncode: raise RuntimeError("The docker buildx plugin must be installed") - + return docker_cli extra_init_args = Dict( From 3e3ade934145ee01be6918763c3d865267469582 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Mon, 6 Oct 2025 13:47:24 +0200 Subject: [PATCH 05/13] Auto configure container_cli property based on DOCKER_HOST --- repo2docker/docker.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/repo2docker/docker.py b/repo2docker/docker.py index 3b08c269..8a46c3f3 100644 --- a/repo2docker/docker.py +++ b/repo2docker/docker.py @@ -66,36 +66,35 @@ class DockerEngine(ContainerEngine): string_output = True - cli = Unicode( - "", - help=""" - The commandline for Docker. - """, - config=True, - ) + _container_cli = None - @default("cli") - def _default_cli(self): - for cli in ["docker", "podman"]: - docker_version = subprocess.run([cli, "version"]) - if docker_version.returncode == 0: - docker_cli = cli - break + @property + def container_cli(self): + if self._container_cli is not None: + return self._container_cli + + docker_host = os.getenv("DOCKER_HOST") + if docker_host is not None and docker_host.find("podman") != -1: + cli = "podman" else: - raise RuntimeError( - "The docker or podman commandline client must be installed" - ) + cli = "docker" + + docker_version = subprocess.run([cli, "version"]) + if docker_version.returncode: + raise RuntimeError(f"The {cli} commandline client must be installed") # docker buildx is based in a plugin that might not be installed # https://github.com/docker/buildx # # podman buildx command is an alias of podman build. # Not all buildx build features are available in Podman. - docker_buildx_version = subprocess.run([docker_cli, "buildx", "version"]) + docker_buildx_version = subprocess.run([cli, "buildx", "version"]) if docker_buildx_version.returncode: raise RuntimeError("The docker buildx plugin must be installed") - return docker_cli + self._container_cli = cli + + return self._container_cli extra_init_args = Dict( {}, @@ -136,7 +135,7 @@ def build( platform=None, **kwargs, ): - args = [self.cli, "buildx", "build", "--progress", "plain"] + args = [self.container_cli, "buildx", "build", "--progress", "plain"] if load: if push: raise ValueError( @@ -193,7 +192,7 @@ def inspect_image(self, image): Return image configuration if it exists, otherwise None """ proc = subprocess.run( - [self.cli, "image", "inspect", image], capture_output=True + [self.container_cli, "image", "inspect", image], capture_output=True ) if proc.returncode != 0: @@ -222,7 +221,7 @@ def docker_login(self, username, password, registry): try: subprocess.run( [ - self.cli, + self.container_cli, "login", "--username", username, From ced04a536f353c3f725d675a415b9a6f98d3f389 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Mon, 6 Oct 2025 13:51:07 +0200 Subject: [PATCH 06/13] Add podman as container cli for GitHub Actions --- .github/workflows/test.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4645d8c4..ba94f824 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,6 +53,9 @@ jobs: matrix: ubuntu_version: ["24.04"] python_version: ["3.13"] + container_cli: + - docker + - podman repo_type: - base - conda @@ -104,6 +107,10 @@ jobs: - name: Run pytest run: | + if [ "${{ matrix.container_cli }}" = "podman" ] + then + export DOCKER_HOST=unix://$(podman info --format '{{.Host.RemoteSocket.Path}}') + fi pytest --verbose --color=yes --durations=10 --cov=repo2docker tests/${{ matrix.repo_type }} - uses: codecov/codecov-action@v5 From 48e92430a85bd1dd22330979a92229ae8481a28a Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Mon, 6 Oct 2025 14:23:20 +0200 Subject: [PATCH 07/13] Select correct hostname for Podman --- repo2docker/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 7a70a470..dfb66f94 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -588,7 +588,7 @@ def start_container(self): client = self.get_engine() docker_host = os.environ.get("DOCKER_HOST") - if docker_host: + if docker_host and docker_host.find("podman") != -1: host_name = urlparse(docker_host).hostname else: host_name = "127.0.0.1" From ef36a9ccd656245b43c53a5e5bab34f456d5452a Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Fri, 17 Oct 2025 09:04:31 +0200 Subject: [PATCH 08/13] Add note about Podman service to documentation --- docs/source/start.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/start.md b/docs/source/start.md index de1d5bad..8e136c5e 100644 --- a/docs/source/start.md +++ b/docs/source/start.md @@ -24,7 +24,14 @@ Follow [Docker's official installation steps](https://docs.docker.com/get-starte Follow [Podman's official installation steps](https://podman.io/docs/installation). -And configure the `DOCKER_HOST` environment variable following [Podman's official procedure](https://podman-desktop.io/docs/migrating-from-docker/using-the-docker_host-environment-variable#procedure). +After complete the installation of Podman, + +1. creates a [listening service for Podman](https://docs.podman.io/en/latest/markdown/podman-system-service.1.html) by running + + ```bash + systemctl --user start podman.socket + ``` +1. configure the `DOCKER_HOST` environment variable following [Podman's official procedure](https://podman-desktop.io/docs/migrating-from-docker/using-the-docker_host-environment-variable#procedure). You might want to configure the `DOCKER_HOST` environment variable to persist in your `~/.bashrc`. (install)= From 4e7ed9e4eeb7ee922cce3aa314ad25055f9328ac Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Fri, 17 Oct 2025 09:05:22 +0200 Subject: [PATCH 09/13] Enable Podman socket during test --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ba94f824..b4fb0fa9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -109,6 +109,7 @@ jobs: run: | if [ "${{ matrix.container_cli }}" = "podman" ] then + systemctl --user start podman.socket export DOCKER_HOST=unix://$(podman info --format '{{.Host.RemoteSocket.Path}}') fi pytest --verbose --color=yes --durations=10 --cov=repo2docker tests/${{ matrix.repo_type }} From 2b463f215657964f78b02c3736bac086aa5a2208 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Oct 2025 07:05:47 +0000 Subject: [PATCH 10/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/source/start.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/start.md b/docs/source/start.md index 8e136c5e..0c07a932 100644 --- a/docs/source/start.md +++ b/docs/source/start.md @@ -31,6 +31,7 @@ After complete the installation of Podman, ```bash systemctl --user start podman.socket ``` + 1. configure the `DOCKER_HOST` environment variable following [Podman's official procedure](https://podman-desktop.io/docs/migrating-from-docker/using-the-docker_host-environment-variable#procedure). You might want to configure the `DOCKER_HOST` environment variable to persist in your `~/.bashrc`. (install)= From 76ffdfb65b5e6607e944adcebf7fc161008a8c83 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Fri, 17 Oct 2025 13:50:32 +0200 Subject: [PATCH 11/13] Avoid infinite loop in test --- tests/unit/test_editable.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/test_editable.py b/tests/unit/test_editable.py index de4d6261..5105e6d2 100644 --- a/tests/unit/test_editable.py +++ b/tests/unit/test_editable.py @@ -38,7 +38,11 @@ def test_editable_by_host(): container = app.start_container() # give the container a chance to start + waiting_container_counter = 0 while container.status != "running": + if waiting_container_counter >= 60: + assert container.status == "running" + waiting_container_counter = waiting_container_counter + 1 time.sleep(1) try: From b3dea2d9b3decd0c328281b13d38102c3823a431 Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Fri, 17 Oct 2025 14:43:20 +0200 Subject: [PATCH 12/13] Hide output of "docker version" test --- repo2docker/docker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/repo2docker/docker.py b/repo2docker/docker.py index 8a46c3f3..ff8cc9ef 100644 --- a/repo2docker/docker.py +++ b/repo2docker/docker.py @@ -79,7 +79,7 @@ def container_cli(self): else: cli = "docker" - docker_version = subprocess.run([cli, "version"]) + docker_version = subprocess.run([cli, "version"], stdout=subprocess.DEVNULL) if docker_version.returncode: raise RuntimeError(f"The {cli} commandline client must be installed") @@ -88,7 +88,9 @@ def container_cli(self): # # podman buildx command is an alias of podman build. # Not all buildx build features are available in Podman. - docker_buildx_version = subprocess.run([cli, "buildx", "version"]) + docker_buildx_version = subprocess.run( + [cli, "buildx", "version"], stdout=subprocess.DEVNULL + ) if docker_buildx_version.returncode: raise RuntimeError("The docker buildx plugin must be installed") From 1e51be61e320b9e4480663622fc058444e28ea3d Mon Sep 17 00:00:00 2001 From: Raniere Gaia Costa da Silva Date: Fri, 17 Oct 2025 15:49:10 +0200 Subject: [PATCH 13/13] Add WorkingDir to bridge the gap with Podman --- repo2docker/docker.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/repo2docker/docker.py b/repo2docker/docker.py index ff8cc9ef..a35e605a 100644 --- a/repo2docker/docker.py +++ b/repo2docker/docker.py @@ -201,7 +201,15 @@ def inspect_image(self, image): return None config = json.loads(proc.stdout.decode())[0] - return Image(tags=config["RepoTags"], config=config["Config"]) + tags = config["RepoTags"] + oci_image_configuration = config["Config"] + + # WorkingDir is optional but docker always include it. + # https://github.com/containers/podman/discussions/27313 + if "WorkingDir" not in oci_image_configuration: + oci_image_configuration["WorkingDir"] = "" + + return Image(tags=config["RepoTags"], config=oci_image_configuration) @contextmanager def docker_login(self, username, password, registry):