diff --git a/.github/workflows/kubernetes-nightly-build.yml b/.github/workflows/kubernetes-nightly-build.yml index aad856d93..86b04e525 100644 --- a/.github/workflows/kubernetes-nightly-build.yml +++ b/.github/workflows/kubernetes-nightly-build.yml @@ -10,8 +10,10 @@ on: pull_request: branches: [ main ] paths: - - 'workflows/kubernetes-nightly-build.yml' + - '.github/workflows/kubernetes-nightly-build.yml' - 'scripts/python-k8s-e2e.sh' + - 'scripts/python-k8s-e2e-ingress.sh' + - 'scripts/common/kubernetes-e2e.sh' - 'kubernetes/charts/**' concurrency: @@ -20,13 +22,29 @@ concurrency: jobs: k8s-mini-e2e: - name: Kubernetes mini E2E + name: Kubernetes mini E2E (${{ matrix.variant }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - variant: direct + script: scripts/python-k8s-e2e.sh + e2e_gateway_route_mode: "" + - variant: ingress-header + script: scripts/python-k8s-e2e-ingress.sh + e2e_gateway_route_mode: "" + - variant: ingress-uri + script: scripts/python-k8s-e2e-ingress.sh + e2e_gateway_route_mode: uri env: KIND_CLUSTER: opensandbox-e2e KIND_K8S_VERSION: v1.30.4 KUBECONFIG_PATH: /tmp/opensandbox-kind-kubeconfig KUBECONFIG: /tmp/opensandbox-kind-kubeconfig + # tests/base_e2e_test.get_e2e_sandbox_resource (scripts/common/kubernetes-e2e.sh) + OPENSANDBOX_E2E_SANDBOX_CPU: 250m + OPENSANDBOX_E2E_SANDBOX_MEMORY: 512Mi steps: - name: Checkout code uses: actions/checkout@v6 @@ -56,8 +74,9 @@ jobs: uses: azure/setup-helm@v4 - name: Run Kubernetes runtime E2E - run: | - bash ./scripts/python-k8s-e2e.sh + env: + E2E_GATEWAY_ROUTE_MODE: ${{ matrix.e2e_gateway_route_mode }} + run: bash "./${{ matrix.script }}" - name: Dump kind diagnostics if: always() @@ -79,9 +98,11 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: python-k8s-e2e-logs + name: python-k8s-e2e-logs-${{ matrix.variant }} path: | /tmp/opensandbox-server-port-forward.log + /tmp/opensandbox-ingress-gateway-port-forward.log + if-no-files-found: ignore retention-days: 5 - name: Clean up Kind cluster diff --git a/components/ingress/go.mod b/components/ingress/go.mod index f18e6eeb4..fd2411b2c 100644 --- a/components/ingress/go.mod +++ b/components/ingress/go.mod @@ -54,7 +54,7 @@ require ( go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/net v0.49.0 // indirect - golang.org/x/oauth2 v0.32.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.40.0 // indirect golang.org/x/term v0.39.0 // indirect diff --git a/components/ingress/go.sum b/components/ingress/go.sum index b51ebe61c..3365efce4 100644 --- a/components/ingress/go.sum +++ b/components/ingress/go.sum @@ -143,8 +143,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= -golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= -golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/scripts/common/kubernetes-e2e.sh b/scripts/common/kubernetes-e2e.sh new file mode 100644 index 000000000..a4bd90b32 --- /dev/null +++ b/scripts/common/kubernetes-e2e.sh @@ -0,0 +1,286 @@ +#!/usr/bin/env bash +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shared helpers for the Kubernetes Python E2E entrypoints: +# scripts/python-k8s-e2e.sh, scripts/python-k8s-e2e-ingress.sh +# (this file is scripts/common/kubernetes-e2e.sh — library only, not the top-level runner). +# Source after setting REPO_ROOT and the usual E2E_* / image env vars. +# +# Optional: +# E2E_SERVER_GATEWAY_ENABLED=true — include server.gateway.* in Helm values (ingress-gateway path). +# E2E_GATEWAY_ROUTE_MODE — when gateway enabled: header | uri (default header). Matches chart server.gateway.gatewayRouteMode. + +k8s_e2e_export_kubeconfig() { + export KUBECONFIG="${KUBECONFIG_PATH}" + if [ -n "${GITHUB_ENV:-}" ]; then + echo "KUBECONFIG=${KUBECONFIG_PATH}" >> "${GITHUB_ENV}" + fi +} + +k8s_e2e_setup_kind_and_controller() { + cd "${REPO_ROOT}/kubernetes" + make setup-test-e2e KIND_CLUSTER="${KIND_CLUSTER}" KIND_K8S_VERSION="${KIND_K8S_VERSION}" + kind export kubeconfig --name "${KIND_CLUSTER}" --kubeconfig "${KUBECONFIG_PATH}" + + make docker-build-controller CONTROLLER_IMG="${CONTROLLER_IMG}" + kind load docker-image --name "${KIND_CLUSTER}" "${CONTROLLER_IMG}" + make install + make deploy CONTROLLER_IMG="${CONTROLLER_IMG}" + kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-controller-manager -n opensandbox-system + cd "${REPO_ROOT}" +} + +k8s_e2e_build_runtime_images() { + docker build -f server/Dockerfile -t "${SERVER_IMG}" server + docker build -f components/execd/Dockerfile -t "${EXECD_IMG}" "${REPO_ROOT}" + docker build -f components/egress/Dockerfile -t "${EGRESS_IMG}" "${REPO_ROOT}" + if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then + docker build -f components/ingress/Dockerfile -t "${INGRESS_IMG}" "${REPO_ROOT}" + fi + docker pull "${SANDBOX_TEST_IMAGE}" +} + +k8s_e2e_kind_load_runtime_images() { + kind load docker-image --name "${KIND_CLUSTER}" "${SERVER_IMG}" + kind load docker-image --name "${KIND_CLUSTER}" "${EXECD_IMG}" + kind load docker-image --name "${KIND_CLUSTER}" "${EGRESS_IMG}" + if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then + kind load docker-image --name "${KIND_CLUSTER}" "${INGRESS_IMG}" + fi + kind load docker-image --name "${KIND_CLUSTER}" "${SANDBOX_TEST_IMAGE}" +} + +k8s_e2e_apply_pvc_and_seed() { + kubectl get namespace "${E2E_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${E2E_NAMESPACE}" + + cat < /data/marker.txt + echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt + volumeMounts: + - name: pvc + mountPath: /data + volumes: + - name: pvc + persistentVolumeClaim: + claimName: ${PVC_NAME} +EOF + + kubectl wait --for=jsonpath='{.status.phase}'=Succeeded --timeout=120s pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}" + kubectl delete pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}" --ignore-not-found=true +} + +k8s_e2e_write_server_helm_values() { + { + cat < "${SERVER_VALUES_FILE}" +} + +k8s_e2e_validate_rendered_config_toml() { + python3 - <<'PY' "${REPO_ROOT}" "${SERVER_VALUES_FILE}" +import subprocess +import sys + +try: + import tomllib +except ModuleNotFoundError: + import tomli as tomllib + +repo_root, values_file = sys.argv[1], sys.argv[2] +chart_path = f"{repo_root}/kubernetes/charts/opensandbox-server" + +rendered = subprocess.run( + ["helm", "template", "opensandbox-server", chart_path, "-f", values_file], + check=True, + capture_output=True, + text=True, +).stdout + +config_lines = [] +capturing = False +for line in rendered.splitlines(): + if line == " config.toml: |": + capturing = True + continue + if capturing: + if line.startswith("---"): + break + if line.startswith(" "): + config_lines.append(line[4:]) + continue + if line.strip() == "": + config_lines.append("") + continue + break + +if not config_lines: + raise RuntimeError("Failed to extract config.toml from rendered Helm manifest") + +tomllib.loads("\n".join(config_lines) + "\n") +PY +} + +k8s_e2e_helm_install_server() { + kubectl get namespace "${SERVER_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${SERVER_NAMESPACE}" + k8s_e2e_validate_rendered_config_toml + + helm upgrade --install "${SERVER_RELEASE}" "${REPO_ROOT}/kubernetes/charts/opensandbox-server" \ + --namespace "${SERVER_NAMESPACE}" \ + --create-namespace \ + -f "${SERVER_VALUES_FILE}" + if ! kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-server -n "${SERVER_NAMESPACE}"; then + kubectl get pods -n "${SERVER_NAMESPACE}" -o wide || true + kubectl describe deployment/opensandbox-server -n "${SERVER_NAMESPACE}" || true + kubectl describe pods -n "${SERVER_NAMESPACE}" -l app.kubernetes.io/name=opensandbox-server || true + kubectl logs -n "${SERVER_NAMESPACE}" deployment/opensandbox-server --all-containers=true || true + exit 1 + fi + if [ "${E2E_SERVER_GATEWAY_ENABLED:-false}" = "true" ]; then + kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-ingress-gateway -n "${SERVER_NAMESPACE}" + fi +} + +k8s_e2e_wait_http_ok() { + local url="$1" + local i + for i in $(seq 1 30); do + if curl -fsS "${url}" >/dev/null; then + return 0 + fi + sleep 2 + done + curl -fsS "${url}" >/dev/null +} + +# Exports for tests/python (see tests/base_e2e_test.get_e2e_sandbox_resource). +k8s_e2e_export_sandbox_resource_env() { + export OPENSANDBOX_E2E_SANDBOX_CPU="${OPENSANDBOX_E2E_SANDBOX_CPU:-250m}" + export OPENSANDBOX_E2E_SANDBOX_MEMORY="${OPENSANDBOX_E2E_SANDBOX_MEMORY:-512Mi}" +} + +k8s_e2e_generate_sdk_and_run_kubernetes_mini() { + cd "${REPO_ROOT}/sdks/sandbox/python" + make generate-api + cd "${REPO_ROOT}/tests/python" + uv sync --all-extras --refresh + make test-kubernetes-mini +} diff --git a/scripts/python-k8s-e2e-ingress.sh b/scripts/python-k8s-e2e-ingress.sh new file mode 100755 index 000000000..e0a159af3 --- /dev/null +++ b/scripts/python-k8s-e2e-ingress.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Kubernetes E2E (Python) with server ingress.mode=gateway and the chart-deployed +# ingress-gateway (components/ingress). See kubernetes/charts/opensandbox-server/README.md. +# +# Compared to scripts/python-k8s-e2e.sh: +# - Builds/opensandbox/ingress image and sets server.gateway.* so Helm deploys opensandbox-ingress-gateway. +# - Port-forwards both the lifecycle API and the gateway. +# - Sets OPENSANDBOX_TEST_USE_SERVER_PROXY=false so the SDK uses gateway routes + headers from the API. +# +# Route mode (Helm server.gateway.gatewayRouteMode + ingress --mode): +# Default header. For URI path routing: E2E_GATEWAY_ROUTE_MODE=uri ./scripts/python-k8s-e2e-ingress.sh + +set -euxo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=common/kubernetes-e2e.sh +source "${SCRIPT_DIR}/common/kubernetes-e2e.sh" + +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +export E2E_SERVER_GATEWAY_ENABLED=true +export E2E_GATEWAY_ROUTE_MODE="${E2E_GATEWAY_ROUTE_MODE:-header}" + +KIND_CLUSTER="${KIND_CLUSTER:-opensandbox-e2e}" +KIND_K8S_VERSION="${KIND_K8S_VERSION:-v1.30.4}" +KUBECONFIG_PATH="${KUBECONFIG_PATH:-/tmp/opensandbox-kind-kubeconfig}" +E2E_NAMESPACE="${E2E_NAMESPACE:-opensandbox-e2e}" +SERVER_NAMESPACE="${SERVER_NAMESPACE:-opensandbox-system}" +PVC_NAME="${PVC_NAME:-opensandbox-e2e-pvc-test}" +PV_NAME="${PV_NAME:-opensandbox-e2e-pv-test}" +CONTROLLER_IMG="${CONTROLLER_IMG:-opensandbox/controller:e2e-local}" +SERVER_IMG="${SERVER_IMG:-opensandbox/server:e2e-local}" +EXECD_IMG="${EXECD_IMG:-opensandbox/execd:e2e-local}" +EGRESS_IMG="${EGRESS_IMG:-opensandbox/egress:e2e-local}" +INGRESS_IMG="${INGRESS_IMG:-opensandbox/ingress:e2e-local}" +SERVER_RELEASE="${SERVER_RELEASE:-opensandbox-server}" +SERVER_VALUES_FILE="${SERVER_VALUES_FILE:-/tmp/opensandbox-server-values-ingress.yaml}" +PORT_FORWARD_LOG="${PORT_FORWARD_LOG:-/tmp/opensandbox-server-port-forward.log}" +GATEWAY_PORT_FORWARD_LOG="${GATEWAY_PORT_FORWARD_LOG:-/tmp/opensandbox-ingress-gateway-port-forward.log}" +SANDBOX_TEST_IMAGE="${SANDBOX_TEST_IMAGE:-ubuntu:latest}" + +GATEWAY_LOCAL_PORT="${GATEWAY_LOCAL_PORT:-8081}" +INGRESS_GATEWAY_ADDRESS="${INGRESS_GATEWAY_ADDRESS:-127.0.0.1:${GATEWAY_LOCAL_PORT}}" +LIFECYCLE_LOCAL_PORT="${LIFECYCLE_LOCAL_PORT:-8080}" + +SERVER_IMG_REPOSITORY="${SERVER_IMG%:*}" +SERVER_IMG_TAG="${SERVER_IMG##*:}" +INGRESS_IMG_REPOSITORY="${INGRESS_IMG%:*}" +INGRESS_IMG_TAG="${INGRESS_IMG##*:}" + +k8s_e2e_export_kubeconfig +k8s_e2e_setup_kind_and_controller +k8s_e2e_build_runtime_images +k8s_e2e_kind_load_runtime_images +k8s_e2e_apply_pvc_and_seed +k8s_e2e_write_server_helm_values +k8s_e2e_helm_install_server + +kubectl port-forward -n "${SERVER_NAMESPACE}" svc/opensandbox-server "${LIFECYCLE_LOCAL_PORT}:80" >"${PORT_FORWARD_LOG}" 2>&1 & +PORT_FORWARD_PID=$! +kubectl port-forward -n "${SERVER_NAMESPACE}" svc/opensandbox-ingress-gateway "${GATEWAY_LOCAL_PORT}:80" >"${GATEWAY_PORT_FORWARD_LOG}" 2>&1 & +GATEWAY_PORT_FORWARD_PID=$! +cleanup_port_forwards() { + kill "${PORT_FORWARD_PID}" >/dev/null 2>&1 || true + kill "${GATEWAY_PORT_FORWARD_PID}" >/dev/null 2>&1 || true +} +trap cleanup_port_forwards EXIT + +k8s_e2e_wait_http_ok "http://127.0.0.1:${LIFECYCLE_LOCAL_PORT}/health" +k8s_e2e_wait_http_ok "http://127.0.0.1:${GATEWAY_LOCAL_PORT}/status.ok" + +export OPENSANDBOX_TEST_DOMAIN="localhost:${LIFECYCLE_LOCAL_PORT}" +export OPENSANDBOX_TEST_PROTOCOL="http" +export OPENSANDBOX_TEST_API_KEY="" +export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="${SANDBOX_TEST_IMAGE}" +export OPENSANDBOX_E2E_RUNTIME="kubernetes" +export OPENSANDBOX_TEST_USE_SERVER_PROXY="false" +export OPENSANDBOX_TEST_PVC_NAME="${PVC_NAME}" + +k8s_e2e_export_sandbox_resource_env + +k8s_e2e_generate_sdk_and_run_kubernetes_mini diff --git a/scripts/python-k8s-e2e.sh b/scripts/python-k8s-e2e.sh index 33d4a2ae9..8075aa371 100644 --- a/scripts/python-k8s-e2e.sh +++ b/scripts/python-k8s-e2e.sh @@ -15,7 +15,11 @@ set -euxo pipefail -REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=common/kubernetes-e2e.sh +source "${SCRIPT_DIR}/common/kubernetes-e2e.sh" + +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" KIND_CLUSTER="${KIND_CLUSTER:-opensandbox-e2e}" KIND_K8S_VERSION="${KIND_K8S_VERSION:-v1.30.4}" @@ -32,216 +36,26 @@ SERVER_RELEASE="${SERVER_RELEASE:-opensandbox-server}" SERVER_VALUES_FILE="${SERVER_VALUES_FILE:-/tmp/opensandbox-server-values.yaml}" PORT_FORWARD_LOG="${PORT_FORWARD_LOG:-/tmp/opensandbox-server-port-forward.log}" SANDBOX_TEST_IMAGE="${SANDBOX_TEST_IMAGE:-ubuntu:latest}" +LIFECYCLE_LOCAL_PORT="${LIFECYCLE_LOCAL_PORT:-8080}" SERVER_IMG_REPOSITORY="${SERVER_IMG%:*}" SERVER_IMG_TAG="${SERVER_IMG##*:}" -export KUBECONFIG="${KUBECONFIG_PATH}" -if [ -n "${GITHUB_ENV:-}" ]; then - echo "KUBECONFIG=${KUBECONFIG_PATH}" >> "${GITHUB_ENV}" -fi - -cd "${REPO_ROOT}/kubernetes" -make setup-test-e2e KIND_CLUSTER="${KIND_CLUSTER}" KIND_K8S_VERSION="${KIND_K8S_VERSION}" -kind export kubeconfig --name "${KIND_CLUSTER}" --kubeconfig "${KUBECONFIG_PATH}" - -# Build and load the latest controller code used by the Kubernetes runtime backend. -make docker-build-controller CONTROLLER_IMG="${CONTROLLER_IMG}" -kind load docker-image --name "${KIND_CLUSTER}" "${CONTROLLER_IMG}" -make install -make deploy CONTROLLER_IMG="${CONTROLLER_IMG}" -kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-controller-manager -n opensandbox-system -cd "${REPO_ROOT}" - -# Build sandbox-side control plane images from the current workspace so E2E exercises latest server/runtime code. -docker build -f server/Dockerfile -t "${SERVER_IMG}" server -docker build -f components/execd/Dockerfile -t "${EXECD_IMG}" "${REPO_ROOT}" -docker build -f components/egress/Dockerfile -t "${EGRESS_IMG}" "${REPO_ROOT}" -docker pull "${SANDBOX_TEST_IMAGE}" - -kind load docker-image --name "${KIND_CLUSTER}" "${SERVER_IMG}" -kind load docker-image --name "${KIND_CLUSTER}" "${EXECD_IMG}" -kind load docker-image --name "${KIND_CLUSTER}" "${EGRESS_IMG}" -kind load docker-image --name "${KIND_CLUSTER}" "${SANDBOX_TEST_IMAGE}" - -kubectl get namespace "${E2E_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${E2E_NAMESPACE}" - -cat < /data/marker.txt - echo 'pvc-subpath-marker' > /data/datasets/train/marker.txt - volumeMounts: - - name: pvc - mountPath: /data - volumes: - - name: pvc - persistentVolumeClaim: - claimName: ${PVC_NAME} -EOF - -kubectl wait --for=jsonpath='{.status.phase}'=Succeeded --timeout=120s pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}" -kubectl delete pod/opensandbox-e2e-pvc-seed -n "${E2E_NAMESPACE}" --ignore-not-found=true - -cat < "${SERVER_VALUES_FILE}" -server: - image: - repository: ${SERVER_IMG_REPOSITORY} - tag: "${SERVER_IMG_TAG}" - pullPolicy: IfNotPresent - replicaCount: 1 - resources: - limits: - cpu: "1" - memory: 2Gi - requests: - cpu: "250m" - memory: 512Mi -configToml: | - [server] - host = "0.0.0.0" - port = 80 - log_level = "INFO" - api_key = "" - - [runtime] - type = "kubernetes" - execd_image = "${EXECD_IMG}" - - [egress] - image = "${EGRESS_IMG}" +k8s_e2e_export_kubeconfig +k8s_e2e_setup_kind_and_controller +k8s_e2e_build_runtime_images +k8s_e2e_kind_load_runtime_images +k8s_e2e_apply_pvc_and_seed +k8s_e2e_write_server_helm_values +k8s_e2e_helm_install_server - [kubernetes] - namespace = "${E2E_NAMESPACE}" - workload_provider = "batchsandbox" - sandbox_create_timeout_seconds = 180 - sandbox_create_poll_interval_seconds = 1.0 - batchsandbox_template_file = "/etc/opensandbox/example.batchsandbox-template.yaml" - - [storage] - allowed_host_paths = [] -EOF - -kubectl get namespace "${SERVER_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${SERVER_NAMESPACE}" -python3 - <<'PY' "${REPO_ROOT}" "${SERVER_VALUES_FILE}" -import subprocess -import sys - -try: - import tomllib -except ModuleNotFoundError: - import tomli as tomllib - -repo_root, values_file = sys.argv[1], sys.argv[2] -chart_path = f"{repo_root}/kubernetes/charts/opensandbox-server" - -rendered = subprocess.run( - ["helm", "template", "opensandbox-server", chart_path, "-f", values_file], - check=True, - capture_output=True, - text=True, -).stdout - -config_lines = [] -capturing = False -for line in rendered.splitlines(): - if line == " config.toml: |": - capturing = True - continue - if capturing: - if line.startswith("---"): - break - if line.startswith(" "): - config_lines.append(line[4:]) - continue - if line.strip() == "": - config_lines.append("") - continue - break - -if not config_lines: - raise RuntimeError("Failed to extract config.toml from rendered Helm manifest") - -tomllib.loads("\n".join(config_lines) + "\n") -PY - -helm upgrade --install "${SERVER_RELEASE}" "${REPO_ROOT}/kubernetes/charts/opensandbox-server" \ - --namespace "${SERVER_NAMESPACE}" \ - --create-namespace \ - -f "${SERVER_VALUES_FILE}" -if ! kubectl wait --for=condition=available --timeout=180s deployment/opensandbox-server -n "${SERVER_NAMESPACE}"; then - kubectl get pods -n "${SERVER_NAMESPACE}" -o wide || true - kubectl describe deployment/opensandbox-server -n "${SERVER_NAMESPACE}" || true - kubectl describe pods -n "${SERVER_NAMESPACE}" -l app.kubernetes.io/name=opensandbox-server || true - kubectl logs -n "${SERVER_NAMESPACE}" deployment/opensandbox-server --all-containers=true || true - exit 1 -fi - -kubectl port-forward -n "${SERVER_NAMESPACE}" svc/opensandbox-server 8080:80 >"${PORT_FORWARD_LOG}" 2>&1 & +kubectl port-forward -n "${SERVER_NAMESPACE}" svc/opensandbox-server "${LIFECYCLE_LOCAL_PORT}:80" >"${PORT_FORWARD_LOG}" 2>&1 & PORT_FORWARD_PID=$! trap 'kill "${PORT_FORWARD_PID}" >/dev/null 2>&1 || true' EXIT -for _ in $(seq 1 30); do - if curl -fsS http://127.0.0.1:8080/health >/dev/null; then - break - fi - sleep 2 -done -curl -fsS http://127.0.0.1:8080/health >/dev/null - -# Build local lifecycle client code before running the Python E2E suite. -cd sdks/sandbox/python -make generate-api -cd ../../.. +k8s_e2e_wait_http_ok "http://127.0.0.1:${LIFECYCLE_LOCAL_PORT}/health" -export OPENSANDBOX_TEST_DOMAIN="localhost:8080" +export OPENSANDBOX_TEST_DOMAIN="localhost:${LIFECYCLE_LOCAL_PORT}" export OPENSANDBOX_TEST_PROTOCOL="http" export OPENSANDBOX_TEST_API_KEY="" export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="${SANDBOX_TEST_IMAGE}" @@ -249,6 +63,6 @@ export OPENSANDBOX_E2E_RUNTIME="kubernetes" export OPENSANDBOX_TEST_USE_SERVER_PROXY="true" export OPENSANDBOX_TEST_PVC_NAME="${PVC_NAME}" -cd tests/python -uv sync --all-extras --refresh -make test-kubernetes-mini +k8s_e2e_export_sandbox_resource_env + +k8s_e2e_generate_sdk_and_run_kubernetes_mini diff --git a/server/Dockerfile b/server/Dockerfile index 7828c81f4..601a6c133 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -54,6 +54,7 @@ COPY --from=builder /app/opensandbox_server /app/opensandbox_server COPY --from=builder /app/opensandbox_server/examples/example.config.k8s.toml /etc/opensandbox/config.toml COPY --from=builder /app/opensandbox_server/examples/example.config.k8s.zh.toml /etc/opensandbox/config.zh.toml COPY --from=builder /app/opensandbox_server/examples/example.batchsandbox-template.yaml /etc/opensandbox/example.batchsandbox-template.yaml +COPY --from=builder /app/opensandbox_server/examples/e2e.batchsandbox-template.yaml /etc/opensandbox/e2e.batchsandbox-template.yaml EXPOSE 8080 diff --git a/server/opensandbox_server/examples/e2e.batchsandbox-template.yaml b/server/opensandbox_server/examples/e2e.batchsandbox-template.yaml new file mode 100644 index 000000000..e1879b54f --- /dev/null +++ b/server/opensandbox_server/examples/e2e.batchsandbox-template.yaml @@ -0,0 +1,17 @@ +# E2E-only BatchSandbox template (not used in production defaults). +# Bundled in the server image for scripts/python-k8s-e2e*.sh; config.toml selects this path. +# +# Faster Pod teardown in Kind/CI: skip the default 30s graceful termination window. +# Do not use for real workloads where graceful shutdown matters. + +# Metadata template (will be merged with runtime-generated metadata) +metadata: +# Spec template +spec: + replicas: 1 + template: + spec: + terminationGracePeriodSeconds: 0 + restartPolicy: Never + tolerations: + - operator: "Exists" diff --git a/tests/python/tests/base_e2e_test.py b/tests/python/tests/base_e2e_test.py index f8f679b2e..89ead7d47 100644 --- a/tests/python/tests/base_e2e_test.py +++ b/tests/python/tests/base_e2e_test.py @@ -51,6 +51,22 @@ def get_sandbox_image() -> str: return TEST_IMAGE +def get_e2e_sandbox_resource() -> dict[str, str]: + """ + CPU/memory for ``Sandbox.create`` / ``SandboxSync.create`` in E2E tests. + + Read from ``OPENSANDBOX_E2E_SANDBOX_CPU`` and ``OPENSANDBOX_E2E_SANDBOX_MEMORY``. + When unset, matches the SDK default (1 CPU, 2Gi memory). + + Kubernetes E2E entry scripts (``scripts/python-k8s-e2e*.sh``) export smaller + defaults for Kind; override via the same env vars. + """ + return { + "cpu": os.getenv("OPENSANDBOX_E2E_SANDBOX_CPU", "1"), + "memory": os.getenv("OPENSANDBOX_E2E_SANDBOX_MEMORY", "2Gi"), + } + + def is_kubernetes_runtime() -> bool: """Whether the current E2E run targets the Kubernetes backend.""" return TEST_RUNTIME == "kubernetes" diff --git a/tests/python/tests/test_sandbox_e2e.py b/tests/python/tests/test_sandbox_e2e.py index 7b1b943af..0737fa7a8 100644 --- a/tests/python/tests/test_sandbox_e2e.py +++ b/tests/python/tests/test_sandbox_e2e.py @@ -59,6 +59,7 @@ TEST_PROTOCOL, create_connection_config, create_connection_config_server_proxy, + get_e2e_sandbox_resource, get_sandbox_image, get_test_host_volume_dir, get_test_pvc_name, @@ -82,27 +83,6 @@ def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 60_000) -> None: assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})" -def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None: - assert endpoint - # In some deployments lifecycle returns direct "host:port". - # In others it returns a reverse-proxy route like "domain/route/{id}/{port}". - # In both cases, we expect NO scheme, and the port to be present deterministically. - assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}" - - if "/" in endpoint: - assert endpoint.endswith(f"/{expected_port}"), ( - f"endpoint route must end with /{expected_port}: {endpoint}" - ) - # Keep this strict: the route must contain a non-empty domain prefix. - assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}" - return - - host, port = endpoint.rsplit(":", 1) - assert host, f"missing host in endpoint: {endpoint}" - assert port.isdigit(), f"non-numeric port in endpoint: {endpoint}" - assert int(port) == expected_port, f"endpoint port mismatch: {endpoint} != :{expected_port}" - - def _assert_times_close(created_at, modified_at, *, tolerance_seconds: float = 2.0) -> None: """ Some filesystems / implementations may report created/modified with slight reordering. @@ -163,6 +143,7 @@ async def _ensure_sandbox_created(cls): cls.sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cls.connection_config, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -234,7 +215,6 @@ async def test_01_sandbox_lifecycle_and_health(self): endpoint = await sandbox.get_endpoint(44772) assert endpoint is not None assert endpoint.endpoint is not None - _assert_endpoint_has_port(endpoint.endpoint, 44772) logger.info(f"✓ Sandbox endpoint: {endpoint.endpoint}") logger.info("Step 4: Get and verify metrics") @@ -310,6 +290,7 @@ async def test_01_sandbox_lifecycle_and_health(self): async def test_01b_manual_cleanup(self): sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=TestSandboxE2E.connection_config, timeout=None, ready_timeout=timedelta(seconds=30), @@ -340,6 +321,7 @@ async def test_01a_network_policy_create(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -374,6 +356,7 @@ async def test_01aa_network_policy_get_and_patch(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -445,6 +428,7 @@ async def test_01ab_network_policy_get_and_patch_with_server_proxy(self): sandbox_ttl = timedelta(minutes=4) sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=sandbox_ttl, ready_timeout=timedelta(seconds=90), @@ -538,6 +522,7 @@ async def test_01b_host_volume_mount(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -609,6 +594,7 @@ async def test_01c_host_volume_mount_readonly(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -661,6 +647,7 @@ async def test_01d_pvc_named_volume_mount(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -728,6 +715,7 @@ async def test_01e_pvc_named_volume_mount_readonly(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -780,6 +768,7 @@ async def test_01f_pvc_named_volume_subpath_mount(self): cfg = create_connection_config() sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), diff --git a/tests/python/tests/test_sandbox_e2e_sync.py b/tests/python/tests/test_sandbox_e2e_sync.py index 7a3cc58b3..51607d0d3 100644 --- a/tests/python/tests/test_sandbox_e2e_sync.py +++ b/tests/python/tests/test_sandbox_e2e_sync.py @@ -59,6 +59,7 @@ TEST_DOMAIN, TEST_PROTOCOL, create_connection_config_sync, + get_e2e_sandbox_resource, get_sandbox_image, get_test_host_volume_dir, get_test_pvc_name, @@ -79,26 +80,6 @@ def _assert_recent_timestamp_ms(ts: int, *, tolerance_ms: int = 60_000) -> None: assert delta <= tolerance_ms, f"timestamp too far from now: delta={delta}ms (ts={ts})" -def _assert_endpoint_has_port(endpoint: str, expected_port: int) -> None: - assert endpoint - # In some deployments lifecycle returns direct "host:port". - # In others it returns a reverse-proxy route like "domain/route/{id}/{port}". - # In both cases, we expect NO scheme, and the port to be present deterministically. - assert "://" not in endpoint, f"unexpected scheme in endpoint: {endpoint}" - - if "/" in endpoint: - assert endpoint.endswith(f"/{expected_port}"), ( - f"endpoint route must end with /{expected_port}: {endpoint}" - ) - assert endpoint.split("/", 1)[0], f"missing domain in endpoint: {endpoint}" - return - - host, port = endpoint.rsplit(":", 1) - assert host, f"missing host in endpoint: {endpoint}" - assert port.isdigit(), f"non-numeric port in endpoint: {endpoint}" - assert int(port) == expected_port, f"endpoint port mismatch: {endpoint} != :{expected_port}" - - def _assert_times_close(created_at, modified_at, *, tolerance_seconds: float = 2.0) -> None: """ Some filesystems / implementations may report created/modified with slight reordering. @@ -164,6 +145,7 @@ def _ensure_sandbox_created(cls) -> None: cls.sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cls.connection_config, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -222,7 +204,6 @@ def test_01_sandbox_lifecycle_and_health(self) -> None: endpoint = sandbox.get_endpoint(44772) assert endpoint is not None assert endpoint.endpoint is not None - _assert_endpoint_has_port(endpoint.endpoint, 44772) metrics = sandbox.get_metrics() assert metrics is not None @@ -272,6 +253,7 @@ def test_01_sandbox_lifecycle_and_health(self) -> None: def test_01b_manual_cleanup(self) -> None: sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=TestSandboxE2ESync.connection_config, timeout=None, ready_timeout=timedelta(seconds=30), @@ -299,6 +281,7 @@ def test_01a_network_policy_create(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -337,6 +320,7 @@ def test_01aa_network_policy_get_and_patch(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -409,6 +393,7 @@ def test_01b_host_volume_mount(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -481,6 +466,7 @@ def test_01c_host_volume_mount_readonly(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -537,6 +523,7 @@ def test_01d_pvc_named_volume_mount(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -606,6 +593,7 @@ def test_01e_pvc_named_volume_mount_readonly(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30), @@ -662,6 +650,7 @@ def test_01f_pvc_named_volume_subpath_mount(self) -> None: cfg = create_connection_config_sync() sandbox = SandboxSync.create( image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), connection_config=cfg, timeout=timedelta(minutes=5), ready_timeout=timedelta(seconds=30),