Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions server/opensandbox_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ class EgressConfig(BaseModel):
class RuntimeConfig(BaseModel):
"""Runtime selection (docker, kubernetes, etc.)."""

type: Literal["docker", "kubernetes"] = Field(
type: Literal["docker", "kubernetes", "podman"] = Field(
...,
description="Active sandbox runtime implementation.",
)
Expand Down Expand Up @@ -555,6 +555,18 @@ class DockerConfig(BaseModel):
)


class PodmanConfig(BaseModel):
"""Podman-specific settings."""

socket_path: Optional[str] = Field(
default=None,
description=(
"Explicit Podman API socket path. "
"Auto-detected from standard platform locations if omitted."
),
)


class AppConfig(BaseModel):
"""Root application configuration model."""

Expand All @@ -568,6 +580,7 @@ class AppConfig(BaseModel):
agent_sandbox: Optional["AgentSandboxRuntimeConfig"] = None
ingress: Optional[IngressConfig] = None
docker: DockerConfig = Field(default_factory=DockerConfig)
podman: PodmanConfig = Field(default_factory=PodmanConfig)
storage: StorageConfig = Field(default_factory=StorageConfig)
egress: Optional[EgressConfig] = None
secure_runtime: Optional[SecureRuntimeConfig] = Field(
Expand All @@ -577,15 +590,16 @@ class AppConfig(BaseModel):

@model_validator(mode="after")
def validate_runtime_blocks(self) -> "AppConfig":
if self.runtime.type == "docker":
if self.runtime.type in ("docker", "podman"):
rt = self.runtime.type
if self.kubernetes is not None:
raise ValueError("Kubernetes block must be omitted when runtime.type = 'docker'.")
raise ValueError(f"Kubernetes block must be omitted when runtime.type = '{rt}'.")
if self.agent_sandbox is not None:
raise ValueError("agent_sandbox block must be omitted when runtime.type = 'docker'.")
raise ValueError(f"agent_sandbox block must be omitted when runtime.type = '{rt}'.")
if self.ingress is not None and self.ingress.mode != INGRESS_MODE_DIRECT:
raise ValueError("ingress.mode must be 'direct' when runtime.type = 'docker'.")
raise ValueError(f"ingress.mode must be 'direct' when runtime.type = '{rt}'.")
if self.secure_runtime is not None and self.secure_runtime.type == "firecracker":
raise ValueError( "secure_runtime.type 'firecracker' is only compatible with runtime.type='kubernetes'.")
raise ValueError("secure_runtime.type 'firecracker' is only compatible with runtime.type='kubernetes'.")
elif self.runtime.type == "kubernetes":
if self.kubernetes is None:
self.kubernetes = KubernetesRuntimeConfig()
Expand Down Expand Up @@ -695,6 +709,7 @@ def get_config_path() -> Path:
"INGRESS_MODE_DIRECT",
"INGRESS_MODE_GATEWAY",
"DockerConfig",
"PodmanConfig",
"StorageConfig",
"KubernetesRuntimeConfig",
"EgressConfig",
Expand Down
4 changes: 2 additions & 2 deletions server/opensandbox_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ async def lifespan(app: FastAPI):
k8s_client = None
runtime_type = app_config.runtime.type

if runtime_type == "docker":
if runtime_type in ("docker", "podman"):
import docker

docker_client = docker.from_env()
logger.info("Validating secure runtime for Docker backend")
logger.info("Validating secure runtime for %s backend", runtime_type)
elif runtime_type == "kubernetes":
from opensandbox_server.services.k8s.client import K8sClient

Expand Down
2 changes: 2 additions & 0 deletions server/opensandbox_server/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from opensandbox_server.services.docker import DockerSandboxService
from opensandbox_server.services.extension_service import ExtensionService, require_extension_service
from opensandbox_server.services.k8s.kubernetes_service import KubernetesSandboxService
from opensandbox_server.services.podman import PodmanSandboxService
from opensandbox_server.services.factory import create_sandbox_service
from opensandbox_server.services.sandbox_service import SandboxService

Expand All @@ -26,5 +27,6 @@
"require_extension_service",
"DockerSandboxService",
"KubernetesSandboxService",
"PodmanSandboxService",
"create_sandbox_service",
]
78 changes: 46 additions & 32 deletions server/opensandbox_server/services/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ class DockerSandboxService(DockerDiagnosticsMixin, OSSFSMixin, SandboxService, E
This class implements sandbox lifecycle operations using Docker containers.
"""

@classmethod
def _supported_runtime_types(cls) -> tuple[str, ...]:
"""Runtime types accepted by this service class (overridable by subclasses)."""
return ("docker",)

def __init__(self, config: Optional[AppConfig] = None):
"""
Initialize Docker sandbox service.
Expand All @@ -151,49 +156,25 @@ def __init__(self, config: Optional[AppConfig] = None):
"""
self.app_config = config or get_config()
runtime_config = self.app_config.runtime
if runtime_config.type != "docker":
raise ValueError("DockerSandboxService requires runtime.type = 'docker'.")
if runtime_config.type not in self._supported_runtime_types():
raise ValueError(
f"{type(self).__name__} requires runtime.type in {self._supported_runtime_types()}."
)

self.execd_image = runtime_config.execd_image
self.network_mode = (self.app_config.docker.network_mode or HOST_NETWORK_MODE).lower()
self._execd_archive_cache: Optional[bytes] = None
self._api_timeout = self._resolve_api_timeout()
try:
# Initialize Docker service from environment variables
client_kwargs = {}
try:
signature = inspect.signature(docker.from_env)
if "timeout" in signature.parameters:
client_kwargs["timeout"] = self._api_timeout
except (ValueError, TypeError):
logger.debug(
"Unable to introspect docker.from_env signature; using default parameters."
)
self.docker_client = docker.from_env(**client_kwargs)
if not client_kwargs:
try:
self.docker_client.api.timeout = self._api_timeout
except AttributeError:
logger.debug("Docker client API does not expose timeout attribute.")
logger.info("Docker service initialized from environment")
self.docker_client = self._create_docker_client()
logger.info("%s initialized from environment", type(self).__name__)
except Exception as e: # noqa: BLE001
# Common failure mode on macOS/dev machines: Docker daemon not running or socket path wrong.
hint = ""
msg = str(e)
if isinstance(e, FileNotFoundError) or "No such file or directory" in msg:
docker_host = os.environ.get("DOCKER_HOST", "")
hint = (
" Docker daemon seems unavailable (unix socket not found). "
"Make sure Docker Desktop (or Colima/Rancher Desktop) is running. "
"If you use Colima on macOS, you may need to set "
"DOCKER_HOST=unix://${HOME}/.colima/default/docker.sock before starting the server. "
f"(current DOCKER_HOST='{docker_host}')"
)
hint = self._connection_error_hint(e)
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={
"code": SandboxErrorCodes.DOCKER_INITIALIZATION_ERROR,
"message": f"Failed to initialize Docker service: {str(e)}.{hint}",
"message": f"Failed to initialize {type(self).__name__}: {str(e)}.{hint}",
},
)
self._expiration_lock = Lock()
Expand All @@ -218,6 +199,39 @@ def _resolve_api_timeout(self) -> int:
return cfg
return 180

def _connection_error_hint(self, error: Exception) -> str:
"""Return a user-friendly hint when the container daemon is unreachable."""
msg = str(error)
if isinstance(error, FileNotFoundError) or "No such file or directory" in msg:
docker_host = os.environ.get("DOCKER_HOST", "")
return (
" Docker daemon seems unavailable (unix socket not found). "
"Make sure Docker Desktop (or Colima/Rancher Desktop) is running. "
"If you use Colima on macOS, you may need to set "
"DOCKER_HOST=unix://${HOME}/.colima/default/docker.sock before starting the server. "
f"(current DOCKER_HOST='{docker_host}')"
)
return ""

def _create_docker_client(self):
"""Create and return a Docker SDK client (overridable by subclasses)."""
client_kwargs: dict = {}
try:
signature = inspect.signature(docker.from_env)
if "timeout" in signature.parameters:
client_kwargs["timeout"] = self._api_timeout
except (ValueError, TypeError):
logger.debug(
"Unable to introspect docker.from_env signature; using default parameters."
)
client = docker.from_env(**client_kwargs)
if not client_kwargs:
try:
client.api.timeout = self._api_timeout
except AttributeError:
logger.debug("Docker client API does not expose timeout attribute.")
return client

@contextmanager
def _docker_operation(self, action: str, sandbox_id: Optional[str] = None):
"""Context manager to log duration for Docker API calls."""
Expand Down
2 changes: 2 additions & 0 deletions server/opensandbox_server/services/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from opensandbox_server.config import AppConfig, get_config
from opensandbox_server.services.docker import DockerSandboxService
from opensandbox_server.services.k8s import KubernetesSandboxService
from opensandbox_server.services.podman import PodmanSandboxService
from opensandbox_server.services.sandbox_service import SandboxService

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -57,6 +58,7 @@ def create_sandbox_service(
implementations: dict[str, type[SandboxService]] = {
"docker": DockerSandboxService,
"kubernetes": KubernetesSandboxService,
"podman": PodmanSandboxService,
# Future implementations can be added here:
# "containerd": ContainerdSandboxService,
}
Expand Down
165 changes: 165 additions & 0 deletions server/opensandbox_server/services/podman.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Copyright 2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Podman-based implementation of SandboxService.

Inherits from DockerSandboxService and communicates with Podman through its
Docker-compatible API socket. Only the handful of operations where Podman's
compat layer diverges from Docker are overridden here; everything else
(container lifecycle, image management, volume mounts, port mapping, egress
sidecar, bootstrap injection, …) is reused as-is from the parent class.
"""

import logging
import os
import sys
from typing import Dict, Optional

import docker as docker_mod
from urllib3.util.retry import Retry

from opensandbox_server.config import AppConfig, get_config
from opensandbox_server.services.docker import DockerSandboxService

logger = logging.getLogger(__name__)


class PodmanSandboxService(DockerSandboxService):
"""Sandbox service backed by Podman via the Docker-compatible API."""

@classmethod
def _supported_runtime_types(cls) -> tuple[str, ...]:
return ("podman",)


def __init__(self, config: Optional[AppConfig] = None):
app_config = config or get_config()
self._podman_base_url = self._resolve_podman_url(app_config)
super().__init__(config=config)
self._configure_retry_adapter()

def _create_docker_client(self):
"""Create a Docker SDK client connected to the Podman compat socket."""
kwargs: dict = {"timeout": self._api_timeout}
if self._podman_base_url:
kwargs["base_url"] = self._podman_base_url
logger.info("Connecting to Podman at %s", self._podman_base_url)
return docker_mod.DockerClient(**kwargs)
# Fall back to environment / default detection.
return super()._create_docker_client()

def _configure_retry_adapter(self) -> None:
"""Patch the existing transport adapter to retry on idle disconnects.

Podman (especially on Windows named pipes) may close idle connections
earlier than Docker. The Docker SDK reuses HTTP connections by default,
so a subsequent API call on a stale connection hits
``RemoteDisconnected``. Rather than replacing the transport adapter
(which would break named-pipe support), we patch ``max_retries`` on
the adapter that the Docker SDK already installed.
"""
try:
retry = Retry(total=3, connect=3, read=1, backoff_factor=0.1)
adapter = self.docker_client.api.get_adapter("http+docker://")
adapter.max_retries = retry
logger.debug("Retry policy patched on existing Docker SDK adapter.")
except Exception: # noqa: BLE001
logger.debug("Could not configure retry policy for Podman client.")

@staticmethod
def _resolve_podman_url(config: AppConfig) -> Optional[str]:
"""Return the Podman API URL without mutating ``os.environ``."""
if os.environ.get("DOCKER_HOST"):
return None

socket_path = config.podman.socket_path
if socket_path:
return socket_path if "://" in socket_path else f"unix://{socket_path}"

return PodmanSandboxService._detect_podman_socket()

@staticmethod
def _detect_podman_socket() -> Optional[str]:
"""Return the first reachable Podman API socket for the current platform."""
if sys.platform == "win32":
return _check_windows_pipe("podman-machine-default")

if sys.platform == "darwin":
home = os.environ.get("HOME", "")
candidates = [
f"{home}/.local/share/containers/podman/machine/podman.sock",
f"{home}/.local/share/containers/podman/machine/qemu/podman.sock",
]
else: # linux
xdg = os.environ.get(
"XDG_RUNTIME_DIR",
f"/run/user/{os.getuid()}",
)
candidates = [
f"{xdg}/podman/podman.sock", # rootless
"/run/podman/podman.sock", # rootful
]

for path in candidates:
if os.path.exists(path):
return f"unix://{path}"

return None

def _connection_error_hint(self, error: Exception) -> str:
"""Return a Podman-specific hint when the API socket is unreachable."""
msg = str(error)
if isinstance(error, FileNotFoundError) or "No such file or directory" in msg:
docker_host = os.environ.get("DOCKER_HOST", "")
base = self._podman_base_url or docker_host
return (
" Podman API socket seems unavailable. "
"Make sure Podman is installed and the socket is active "
"(run 'systemctl --user start podman.socket' on Linux, "
"or 'podman machine start' on macOS/Windows). "
f"(current target='{base}')"
)
return ""

def _update_container_labels(self, container, labels: Dict[str, str]) -> None:
"""Skip container label updates — Podman does not support this operation.

Expiration is already tracked in-memory via ``_sandbox_expirations`` by
``_schedule_expiration()``. The only consequence of skipping the label
write is that a server restart after ``renew_expiration`` will fall back
to the original expiration timestamp stored in the container label at
creation time. This is an acceptable degradation — the sandbox may
expire earlier than the renewed time, matching the behaviour the parent
class already tolerates when the label update fails (see the
``except (DockerException, TypeError)`` guard in ``renew_expiration``).
"""
logger.debug(
"Skipping container label update on Podman (not supported): %s",
container.id[:12] if hasattr(container, "id") else "unknown",
)


def _check_windows_pipe(pipe_name: str) -> Optional[str]:
"""Verify that a Windows named pipe exists by attempting to open it."""
win_path = f"\\\\.\\pipe\\{pipe_name}"
try:
# Opening with os.open works for named pipes on Windows and is
# cheap — we close immediately without reading.
fd = os.open(win_path, os.O_RDONLY)
os.close(fd)
except OSError:
return None
# The Docker SDK expects forward slashes in the npipe:// URL.
return f"npipe:////./pipe/{pipe_name}"
2 changes: 1 addition & 1 deletion server/opensandbox_server/services/runtime_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ async def validate_secure_runtime_on_startup(
logger.info("Secure runtime is not configured.")
return

if config.runtime.type == "docker":
if config.runtime.type in ("docker", "podman"):
await _validate_docker_runtime(resolver, docker_client)
elif config.runtime.type == "kubernetes":
await _validate_k8s_runtime_class(resolver, k8s_client, config)
Expand Down
Loading
Loading