Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions .github/workflows/kubernetes-nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ on:
pull_request:
branches: [ main ]
paths:
- 'workflows/kubernetes-nightly-build.yml'
- '.github/workflows/kubernetes-nightly-build.yml'
- 'scripts/python-k8s-e2e.sh'
- 'scripts/python-k8s-e2e-ingress.sh'
- 'scripts/common/kubernetes-e2e.sh'
- 'kubernetes/charts/**'

concurrency:
Expand All @@ -20,13 +22,29 @@ concurrency:

jobs:
k8s-mini-e2e:
name: Kubernetes mini E2E
name: Kubernetes mini E2E (${{ matrix.variant }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- variant: direct
script: scripts/python-k8s-e2e.sh
e2e_gateway_route_mode: ""
- variant: ingress-header
script: scripts/python-k8s-e2e-ingress.sh
e2e_gateway_route_mode: ""
- variant: ingress-uri
script: scripts/python-k8s-e2e-ingress.sh
e2e_gateway_route_mode: uri
env:
KIND_CLUSTER: opensandbox-e2e
KIND_K8S_VERSION: v1.30.4
KUBECONFIG_PATH: /tmp/opensandbox-kind-kubeconfig
KUBECONFIG: /tmp/opensandbox-kind-kubeconfig
# tests/base_e2e_test.get_e2e_sandbox_resource (scripts/common/kubernetes-e2e.sh)
OPENSANDBOX_E2E_SANDBOX_CPU: 250m
OPENSANDBOX_E2E_SANDBOX_MEMORY: 512Mi
steps:
- name: Checkout code
uses: actions/checkout@v6
Expand Down Expand Up @@ -56,8 +74,9 @@ jobs:
uses: azure/setup-helm@v4

- name: Run Kubernetes runtime E2E
run: |
bash ./scripts/python-k8s-e2e.sh
env:
E2E_GATEWAY_ROUTE_MODE: ${{ matrix.e2e_gateway_route_mode }}
run: bash "./${{ matrix.script }}"

- name: Dump kind diagnostics
if: always()
Expand All @@ -79,9 +98,11 @@ jobs:
if: always()
uses: actions/upload-artifact@v4
with:
name: python-k8s-e2e-logs
name: python-k8s-e2e-logs-${{ matrix.variant }}
path: |
/tmp/opensandbox-server-port-forward.log
/tmp/opensandbox-ingress-gateway-port-forward.log
if-no-files-found: ignore
retention-days: 5

- name: Clean up Kind cluster
Expand Down
2 changes: 1 addition & 1 deletion components/ingress/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ require (
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/net v0.49.0 // indirect
golang.org/x/oauth2 v0.32.0 // indirect
golang.org/x/oauth2 v0.34.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/term v0.39.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions components/ingress/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o=
golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8=
golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down
286 changes: 286 additions & 0 deletions scripts/common/kubernetes-e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#!/usr/bin/env bash
# Copyright 2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Shared helpers for the Kubernetes Python E2E entrypoints:
# scripts/python-k8s-e2e.sh, scripts/python-k8s-e2e-ingress.sh
# (this file is scripts/common/kubernetes-e2e.sh — library only, not the top-level runner).
# Source after setting REPO_ROOT and the usual E2E_* / image env vars.
#
# Optional:
# E2E_SERVER_GATEWAY_ENABLED=true — include server.gateway.* in Helm values (ingress-gateway path).
# E2E_GATEWAY_ROUTE_MODE — when gateway enabled: header | uri (default header). Matches chart server.gateway.gatewayRouteMode.

k8s_e2e_export_kubeconfig() {
export KUBECONFIG="${KUBECONFIG_PATH}"
if [ -n "${GITHUB_ENV:-}" ]; then
echo "KUBECONFIG=${KUBECONFIG_PATH}" >> "${GITHUB_ENV}"
fi
}

k8s_e2e_setup_kind_and_controller() {
cd "${REPO_ROOT}/kubernetes"
make setup-test-e2e KIND_CLUSTER="${KIND_CLUSTER}" KIND_K8S_VERSION="${KIND_K8S_VERSION}"
kind export kubeconfig --name "${KIND_CLUSTER}" --kubeconfig "${KUBECONFIG_PATH}"

make docker-build-controller CONTROLLER_IMG="${CONTROLLER_IMG}"
kind load docker-image --name "${KIND_CLUSTER}" "${CONTROLLER_IMG}"
make install
make deploy CONTROLLER_IMG="${CONTROLLER_IMG}"
kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-controller-manager -n opensandbox-system
cd "${REPO_ROOT}"
}

k8s_e2e_build_runtime_images() {
docker build -f server/Dockerfile -t "${SERVER_IMG}" server
docker build -f components/execd/Dockerfile -t "${EXECD_IMG}" "${REPO_ROOT}"
docker build -f components/egress/Dockerfile -t "${EGRESS_IMG}" "${REPO_ROOT}"
if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then
docker build -f components/ingress/Dockerfile -t "${INGRESS_IMG}" "${REPO_ROOT}"
fi
docker pull "${SANDBOX_TEST_IMAGE}"
}

k8s_e2e_kind_load_runtime_images() {
kind load docker-image --name "${KIND_CLUSTER}" "${SERVER_IMG}"
kind load docker-image --name "${KIND_CLUSTER}" "${EXECD_IMG}"
kind load docker-image --name "${KIND_CLUSTER}" "${EGRESS_IMG}"
if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then
kind load docker-image --name "${KIND_CLUSTER}" "${INGRESS_IMG}"
fi
kind load docker-image --name "${KIND_CLUSTER}" "${SANDBOX_TEST_IMAGE}"
}

k8s_e2e_apply_pvc_and_seed() {
kubectl get namespace "${E2E_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${E2E_NAMESPACE}"

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolume
metadata:
name: ${PV_NAME}
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: manual
hostPath:
path: /tmp/${PV_NAME}
type: DirectoryOrCreate
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ${PVC_NAME}
namespace: ${E2E_NAMESPACE}
spec:
accessModes:
- ReadWriteOnce
storageClassName: manual
resources:
requests:
storage: 1Gi
volumeName: ${PV_NAME}
EOF

kubectl wait --for=jsonpath='{.status.phase}'=Bound --timeout=120s "pvc/${PVC_NAME}" -n "${E2E_NAMESPACE}"

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: opensandbox-e2e-pvc-seed
namespace: ${E2E_NAMESPACE}
spec:
restartPolicy: Never
containers:
- name: seed
image: alpine:3.20
command:
- /bin/sh
- -c
- |
set -eux
mkdir -p /data/datasets/train
echo 'pvc-marker-data' > /data/marker.txt
echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt
volumeMounts:
- name: pvc
mountPath: /data
volumes:
- name: pvc
persistentVolumeClaim:
claimName: ${PVC_NAME}
EOF

kubectl wait --for=jsonpath='{.status.phase}'=Succeeded --timeout=120s pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}"
kubectl delete pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}" --ignore-not-found=true
}

k8s_e2e_write_server_helm_values() {
{
cat <<EOF
server:
image:
repository: ${SERVER_IMG_REPOSITORY}
tag: "${SERVER_IMG_TAG}"
pullPolicy: IfNotPresent
replicaCount: 1
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "250m"
memory: 512Mi
EOF
if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then
cat <<EOF
gateway:
enabled: true
host: "${INGRESS_GATEWAY_ADDRESS}"
gatewayRouteMode: "${E2E_GATEWAY_ROUTE_MODE:-header}"
dataplaneNamespace: "${E2E_NAMESPACE}"
replicaCount: 1
image:
repository: ${INGRESS_IMG_REPOSITORY}
tag: "${INGRESS_IMG_TAG}"
resources:
limits:
cpu: "1"
memory: 1Gi
requests:
cpu: "250m"
memory: 512Mi
EOF
fi
cat <<EOF
configToml: |
[server]
host = "0.0.0.0"
port = 80
log_level = "INFO"
api_key = ""

[runtime]
type = "kubernetes"
execd_image = "${EXECD_IMG}"

[egress]
image = "${EGRESS_IMG}"

[kubernetes]
namespace = "${E2E_NAMESPACE}"
workload_provider = "batchsandbox"
sandbox_create_timeout_seconds = 180
sandbox_create_poll_interval_seconds = 1.0
batchsandbox_template_file = "/etc/opensandbox/e2e.batchsandbox-template.yaml"

[storage]
allowed_host_paths = []
EOF
} > "${SERVER_VALUES_FILE}"
}

k8s_e2e_validate_rendered_config_toml() {
python3 - <<'PY' "${REPO_ROOT}" "${SERVER_VALUES_FILE}"
import subprocess
import sys

try:
import tomllib
except ModuleNotFoundError:
import tomli as tomllib

repo_root, values_file = sys.argv[1], sys.argv[2]
chart_path = f"{repo_root}/kubernetes/charts/opensandbox-server"

rendered = subprocess.run(
["helm", "template", "opensandbox-server", chart_path, "-f", values_file],
check=True,
capture_output=True,
text=True,
).stdout

config_lines = []
capturing = False
for line in rendered.splitlines():
if line == " config.toml: |":
capturing = True
continue
if capturing:
if line.startswith("---"):
break
if line.startswith(" "):
config_lines.append(line[4:])
continue
if line.strip() == "":
config_lines.append("")
continue
break

if not config_lines:
raise RuntimeError("Failed to extract config.toml from rendered Helm manifest")

tomllib.loads("\n".join(config_lines) + "\n")
PY
}

k8s_e2e_helm_install_server() {
kubectl get namespace "${SERVER_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${SERVER_NAMESPACE}"
k8s_e2e_validate_rendered_config_toml

helm upgrade --install "${SERVER_RELEASE}" "${REPO_ROOT}/kubernetes/charts/opensandbox-server" \
--namespace "${SERVER_NAMESPACE}" \
--create-namespace \
-f "${SERVER_VALUES_FILE}"
if ! kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-server -n "${SERVER_NAMESPACE}"; then
kubectl get pods -n "${SERVER_NAMESPACE}" -o wide || true
kubectl describe deployment/opensandbox-server -n "${SERVER_NAMESPACE}" || true
kubectl describe pods -n "${SERVER_NAMESPACE}" -l app.kubernetes.io/name=opensandbox-server || true
kubectl logs -n "${SERVER_NAMESPACE}" deployment/opensandbox-server --all-containers=true || true
exit 1
fi
if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then
kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-ingress-gateway -n "${SERVER_NAMESPACE}"
fi
}

k8s_e2e_wait_http_ok() {
local url="$1"
local i
for i in $(seq 1 30); do
if curl -fsS "${url}" >/dev/null; then
return 0
fi
sleep 2
done
curl -fsS "${url}" >/dev/null
}

# Exports for tests/python (see tests/base_e2e_test.get_e2e_sandbox_resource).
k8s_e2e_export_sandbox_resource_env() {
export OPENSANDBOX_E2E_SANDBOX_CPU="${OPENSANDBOX_E2E_SANDBOX_CPU:-250m}"
export OPENSANDBOX_E2E_SANDBOX_MEMORY="${OPENSANDBOX_E2E_SANDBOX_MEMORY:-512Mi}"
}

k8s_e2e_generate_sdk_and_run_kubernetes_mini() {
cd "${REPO_ROOT}/sdks/sandbox/python"
make generate-api
cd "${REPO_ROOT}/tests/python"
uv sync --all-extras --refresh
make test-kubernetes-mini
}
Loading
Loading