diff --git a/.buildkite/build.rayci.yml b/.buildkite/build.rayci.yml index 56040191b551..be0fe43f38fa 100644 --- a/.buildkite/build.rayci.yml +++ b/.buildkite/build.rayci.yml @@ -132,89 +132,311 @@ steps: depends_on: manylinux-x86_64 job_env: manylinux-x86_64 - - label: ":tapioca: build: ray py{{matrix}} docker (x86_64)" - key: ray_images + - name: ray-image-cpu-build + label: "wanda: ray py{{matrix}} cpu (x86_64)" + wanda: ci/docker/ray-image-cpu.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray --upload depends_on: - - manylinux-x86_64 - - forge - - raycudabase + - ray-wheel-build - raycpubase + + # Push ray CPU images to Docker Hub + - label: ":docker: push: ray py{{matrix}} cpu (x86_64)" + key: ray_images_cpu_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix}} + --platform cpu + --image-type ray + --upload matrix: - "3.10" - "3.11" - "3.12" + depends_on: + - ray-image-cpu-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - - label: ":tapioca: build: ray-extra py{{matrix}} docker (x86_64)" - key: ray_extra_images + - name: ray-image-cuda-build + label: "wanda: ray py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium + depends_on: + - ray-wheel-build + - raycudabase + + # Push ray CUDA images to Docker Hub + - label: ":docker: push: ray py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_images_cuda_push + instance_type: small commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray-extra --upload + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray + --upload + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" depends_on: - - manylinux-x86_64 - - forge - - raycpubaseextra - - raycudabaseextra + - ray-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss + + - name: ray-extra-image-cpu-build + label: "wanda: ray-extra py{{matrix}} cpu (x86_64)" + wanda: ci/docker/ray-extra-image-cpu.wanda.yaml matrix: - "3.10" - "3.11" - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build + - raycpubaseextra - - label: ":tapioca: build: ray-llm py{{matrix}} docker (x86_64)" + - name: ray-extra-image-cuda-build + label: "wanda: ray-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-extra-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu12.8.1-cudnn --image-type ray-llm --upload depends_on: - - manylinux-x86_64 - - forge + - ray-wheel-build + - raycudabaseextra + + - name: ray-llm-image-cuda-build + label: "wanda: ray-llm py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-llm-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build - ray-llmbase + + - name: ray-llm-extra-image-cuda-build + label: "wanda: ray-llm-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-llm-extra-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build + - ray-llmbaseextra + + # Push ray-extra CPU images to Docker Hub (goes to rayproject/ray) + - label: ":docker: push: ray-extra py{{matrix}} cpu (x86_64)" + key: ray_extra_images_cpu_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix}} + --platform cpu + --image-type ray-extra + --upload matrix: + - "3.10" - "3.11" + - "3.12" + depends_on: + - ray-extra-image-cpu-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - - label: ":tapioca: build: ray-llm-extra py{{matrix}} docker (x86_64)" + # Push ray-extra CUDA images to Docker Hub (goes to rayproject/ray) + - label: ":docker: push: ray-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_extra_images_cuda_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-extra + --upload + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + depends_on: + - ray-extra-image-cuda-build tags: - python_dependencies - docker + - skip-on-premerge - oss - instance_type: medium + + # Push ray-llm CUDA images to Docker Hub (goes to rayproject/ray-llm) + - label: ":docker: push: ray-llm py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_llm_images_cuda_push + instance_type: small commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu12.8.1-cudnn --image-type ray-llm-extra --upload + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-llm + --upload + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" depends_on: - - manylinux-x86_64 - - forge - - ray-llmbaseextra + - ray-llm-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss + + # Push ray-llm-extra CUDA images to Docker Hub (goes to rayproject/ray-llm) + - label: ":docker: push: ray-llm-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_llm_extra_images_cuda_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-llm-extra + --upload matrix: - - "3.11" + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + depends_on: + - ray-llm-extra-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - label: ":tapioca: smoke test build-docker.sh" tags: @@ -231,6 +453,7 @@ steps: depends_on: - forge + # Generate nightly indexes after images are pushed - label: ":tapioca: generate nightly indexes" instance_type: small tags: @@ -241,6 +464,7 @@ steps: - bazel run .buildkite:copy_files -- --destination docker_login - bazel run //ci/ray_ci/automation:generate_index -- --prefix nightly depends_on: - - ray_images + - ray_images_cpu_push + - ray_images_cuda_push - ray_images_aarch64 - forge diff --git a/ci/docker/ray-extra-image-cpu.wanda.yaml b/ci/docker/ray-extra-image-cpu.wanda.yaml new file mode 100644 index 000000000000..a07dcae79f43 --- /dev/null +++ b/ci/docker/ray-extra-image-cpu.wanda.yaml @@ -0,0 +1,19 @@ +# Ray Extra CPU Image +# Installs ray wheel into the CPU base-extra image +# +# This produces the final rayproject/ray image with -extra tag for CPU-only deployments. +# The base-extra image includes additional Python dependencies beyond base. +# +name: "ray-extra-py$PYTHON_VERSION-cpu$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra$ARCH_SUFFIX" # CPU base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-extra-image-cuda.wanda.yaml b/ci/docker/ray-extra-image-cuda.wanda.yaml new file mode 100644 index 000000000000..2d2629cdb959 --- /dev/null +++ b/ci/docker/ray-extra-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray Extra CUDA Image +# Installs ray wheel into the CUDA base-extra image +# +# This produces the final rayproject/ray image with -extra tag for GPU deployments. +# The base-extra image includes additional Python dependencies beyond base. +# +name: "ray-extra-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX" # CUDA base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image-cpu.wanda.yaml b/ci/docker/ray-image-cpu.wanda.yaml new file mode 100644 index 000000000000..ff51000348bf --- /dev/null +++ b/ci/docker/ray-image-cpu.wanda.yaml @@ -0,0 +1,18 @@ +# Ray CPU Image +# Installs ray wheel into the CPU base image +# +# This produces the final rayproject/ray image for CPU-only deployments. +# +name: "ray-py$PYTHON_VERSION-cpu$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base$ARCH_SUFFIX" # CPU base image with Python + deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image-cuda.wanda.yaml b/ci/docker/ray-image-cuda.wanda.yaml new file mode 100644 index 000000000000..bd716d920748 --- /dev/null +++ b/ci/docker/ray-image-cuda.wanda.yaml @@ -0,0 +1,19 @@ +# Ray CUDA Image +# Installs ray wheel into the CUDA base image +# +# This produces the final rayproject/ray image for GPU deployments. +# +name: "ray-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX" # CUDA base image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image.Dockerfile b/ci/docker/ray-image.Dockerfile new file mode 100644 index 000000000000..e13fa2093c10 --- /dev/null +++ b/ci/docker/ray-image.Dockerfile @@ -0,0 +1,58 @@ +# syntax=docker/dockerfile:1.3-labs +# +# Ray Image Builder +# ================= +# Installs the Ray wheel into a base image (CPU or CUDA). +# +# This Dockerfile uses multi-stage builds to: +# 1. Extract the wheel from the ray-wheel wanda cache (scratch image) +# 2. Install it into the base image (ray-py{VER}-{cpu/cuda}-base) +# +# The base image already contains: +# - Python with conda/anaconda +# - Core dependencies (numpy, etc.) +# - System libraries (jemalloc, etc.) +# +# This image adds: +# - Ray wheel with [all] extras +# - pip freeze output for reproducibility +# +ARG BASE_IMAGE +ARG RAY_WHEEL_IMAGE + +FROM ${RAY_WHEEL_IMAGE} AS wheel-source +FROM ${BASE_IMAGE} + +ARG PYTHON_VERSION=3.10 + +COPY --from=wheel-source /*.whl /tmp/ +COPY python/requirements_compiled.txt /tmp/ + +# Install Ray wheel with all extras +RUN <&2 + ls -l /tmp/*.whl >&2 + exit 1 +fi +WHEEL_FILE="${WHEEL_FILES[0]}" + +echo "Installing wheel: $WHEEL_FILE" + +# Install ray with all extras, using constraints for reproducibility +$HOME/anaconda3/bin/pip --no-cache-dir install \ + -c /tmp/requirements_compiled.txt \ + "${WHEEL_FILE}[all]" + +# Save pip freeze for debugging/reproducibility +$HOME/anaconda3/bin/pip freeze > /home/ray/pip-freeze.txt + +echo "Ray version: $($HOME/anaconda3/bin/python -c 'import ray; print(ray.__version__)')" +EOF + +CMD ["python"] diff --git a/ci/docker/ray-llm-extra-image-cuda.wanda.yaml b/ci/docker/ray-llm-extra-image-cuda.wanda.yaml new file mode 100644 index 000000000000..1bf0993392ff --- /dev/null +++ b/ci/docker/ray-llm-extra-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray LLM Extra CUDA Image +# Installs ray wheel into the ray-llm base-extra image +# +# This produces the final rayproject/ray-llm image with -extra tag. +# The ray-llm base-extra image includes additional Python dependencies beyond base. +# +name: "ray-llm-extra-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX" # LLM base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-llm-image-cuda.wanda.yaml b/ci/docker/ray-llm-image-cuda.wanda.yaml new file mode 100644 index 000000000000..3d96bf7f4f45 --- /dev/null +++ b/ci/docker/ray-llm-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray LLM CUDA Image +# Installs ray wheel into the ray-llm base image +# +# This produces the final rayproject/ray-llm image for LLM deployments. +# The ray-llm base image includes vLLM and other LLM-specific dependencies. +# +name: "ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX" # LLM base image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/ray_ci/automation/BUILD.bazel b/ci/ray_ci/automation/BUILD.bazel index 61553051805e..5184e41df944 100644 --- a/ci/ray_ci/automation/BUILD.bazel +++ b/ci/ray_ci/automation/BUILD.bazel @@ -305,3 +305,29 @@ py_binary( ci_require("click"), ], ) + +py_binary( + name = "push_ray_image", + srcs = ["push_ray_image.py"], + exec_compatible_with = ["//bazel:py3"], + deps = [ + ":crane_lib", + "//ci/ray_ci:ray_ci_lib", + ci_require("click"), + ], +) + +py_test( + name = "test_push_ray_image", + size = "small", + srcs = ["test_push_ray_image.py"], + exec_compatible_with = ["//bazel:py3"], + tags = [ + "ci_unit", + "team:ci", + ], + deps = [ + ":push_ray_image", + ci_require("pytest"), + ], +) diff --git a/ci/ray_ci/automation/push_ray_image.py b/ci/ray_ci/automation/push_ray_image.py new file mode 100644 index 000000000000..4854aa54e4e5 --- /dev/null +++ b/ci/ray_ci/automation/push_ray_image.py @@ -0,0 +1,309 @@ +""" +Push Wanda-cached ray images to Docker Hub. + +This script copies ray images from the Wanda cache to Docker Hub with tags +matching the original format from docker_container.py. + +Supports multiple image types: + - ray: Standard ray image -> rayproject/ray + - ray-extra: Ray with extra deps -> rayproject/ray + - ray-llm: Ray for LLM workloads -> rayproject/ray-llm + - ray-llm-extra: Ray LLM with extra deps -> rayproject/ray-llm + +Example: + bazel run //ci/ray_ci/automation:push_ray_image -- \\ + --python-version 3.10 \\ + --platform cpu \\ + --image-type ray \\ + --upload + +Tag format: + - Nightly: nightly.YYMMDD.{sha[:6]}-py310-cpu + - Release: {release_name}.{sha[:6]}-py310-cpu + - Other: {sha[:6]}-py310-cpu + +Run with --help to see all options. +""" + +import logging +import os +import sys +from datetime import datetime, timezone as tz +from typing import List + +import click + +from ci.ray_ci.automation.crane_lib import ( + call_crane_copy, + call_crane_manifest, +) +from ci.ray_ci.docker_container import RAY_REPO_MAP +from ci.ray_ci.utils import ecr_docker_login + +# GPU_PLATFORM is the default GPU platform that gets aliased as "gpu" +# This must match the definition in ci/ray_ci/docker_container.py +GPU_PLATFORM = "cu12.1.1-cudnn8" + +# Default architecture (x86_64 gets no suffix) +DEFAULT_ARCHITECTURE = "x86_64" + +# Valid image types that can be pushed +VALID_IMAGE_TYPES = list(RAY_REPO_MAP.keys()) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(message)s", + stream=sys.stdout, +) +logger = logging.getLogger(__name__) + + +class PushRayImageError(Exception): + """Error raised when pushing ray images fails.""" + + +def _format_python_version_tag(python_version: str) -> str: + """ + Format python version as -py310 (no dots, with hyphen prefix). + + Examples: + 3.10 -> -py310 + 3.11 -> -py311 + """ + return f"-py{python_version.replace('.', '')}" + + +def _format_platform_tag(platform: str) -> str: + """ + Format platform as -cpu or shortened CUDA version. + + Examples: + cpu -> -cpu + cu11.7.1-cudnn8 -> -cu117 + cu12.1.1-cudnn8 -> -cu121 + """ + if platform == "cpu": + return "-cpu" + # cu11.7.1-cudnn8 -> ['cu11', '7', '1-cudnn8'] -> -cu117 + versions = platform.split(".") + return f"-{versions[0]}{versions[1]}" + + +def _format_architecture_tag(architecture: str) -> str: + """ + Format architecture as suffix (empty for x86_64, -aarch64 for aarch64). + + Examples: + x86_64 -> "" + aarch64 -> -aarch64 + """ + if architecture == DEFAULT_ARCHITECTURE: + return "" + return f"-{architecture}" + + +def _generate_image_tags( + commit: str, + python_version: str, + platform: str, + architecture: str = DEFAULT_ARCHITECTURE, +) -> List[str]: + """ + Generate destination tags matching the original ray docker image format. + + Tag format: + {version_prefix}{py_tag}{platform_tag}{arch_tag} + + Version prefix: + - Nightly (master + nightly schedule): nightly.YYMMDD.{sha[:6]} + - Release branches: {release_name}.{sha[:6]} + - Other: {sha[:6]} + + For GPU_PLATFORM, also generates -gpu alias tags. + """ + branch = os.environ.get("BUILDKITE_BRANCH", "") + schedule = os.environ.get("RAYCI_SCHEDULE", "") + + sha_tag = commit[:6] + formatted_date = datetime.now(tz.utc).strftime("%y%m%d") + + # Generate version prefix + if branch == "master" and schedule == "nightly": + version_tags = [f"nightly.{formatted_date}.{sha_tag}"] + elif branch.startswith("releases/"): + release_name = branch[len("releases/") :] + version_tags = [f"{release_name}.{sha_tag}"] + else: + version_tags = [sha_tag] + + py_tag = _format_python_version_tag(python_version) + arch_tag = _format_architecture_tag(architecture) + + # For GPU_PLATFORM, also create -gpu alias + platform_tags = [_format_platform_tag(platform)] + if platform == GPU_PLATFORM: + platform_tags.append("-gpu") + + tags = [] + for version in version_tags: + for ptag in platform_tags: + tags.append(f"{version}{py_tag}{ptag}{arch_tag}") + + return tags + + +def _get_wanda_image_name( + image_type: str, + python_version: str, + platform: str, + architecture: str = DEFAULT_ARCHITECTURE, +) -> str: + """ + Get the wanda-cached image name for the given image type. + + Wanda image naming follows the pattern: + {image_type}-py{version}-{platform}{arch_suffix} + + Examples: + ray-py3.10-cpu + ray-extra-py3.10-cu12.1.1-cudnn8 + ray-llm-py3.11-cu12.8.1-cudnn + """ + arch_suffix = _format_architecture_tag(architecture) + if platform == "cpu": + return f"{image_type}-py{python_version}-cpu{arch_suffix}" + else: + return f"{image_type}-py{python_version}-{platform}{arch_suffix}" + + +def _image_exists(tag: str) -> bool: + """Check if a container image manifest exists using crane.""" + return_code, _ = call_crane_manifest(tag) + return return_code == 0 + + +def _copy_image(source: str, destination: str, dry_run: bool = False) -> None: + """Copy a container image from source to destination using crane.""" + if dry_run: + logger.info(f"DRY RUN: Would copy {source} -> {destination}") + return + + logger.info(f"Copying {source} -> {destination}") + return_code, output = call_crane_copy(source, destination) + if return_code != 0: + raise PushRayImageError(f"Crane copy failed: {output}") + logger.info(f"Successfully copied to {destination}") + + +@click.command() +@click.option( + "--python-version", + type=str, + required=True, + help="Python version (e.g., '3.10')", +) +@click.option( + "--platform", + type=str, + required=True, + help="Platform (e.g., 'cpu', 'cu11.7.1-cudnn8')", +) +@click.option( + "--image-type", + type=click.Choice(VALID_IMAGE_TYPES), + default="ray", + help="Image type (e.g., 'ray', 'ray-extra', 'ray-llm', 'ray-llm-extra')", +) +@click.option( + "--architecture", + type=str, + default=DEFAULT_ARCHITECTURE, + help="Architecture (e.g., 'x86_64', 'aarch64')", +) +@click.option( + "--upload", + is_flag=True, + default=False, + help="Actually push to Docker Hub. Without this flag, runs in dry-run mode.", +) +def main( + python_version: str, + platform: str, + image_type: str, + architecture: str, + upload: bool, +) -> None: + """ + Push a Wanda-cached ray image to Docker Hub. + + Handles authentication for ECR (source/Wanda cache) and Docker Hub + (destination via copy_files.py). + + Supports multiple image types which map to Docker Hub repos: + - ray, ray-extra -> rayproject/ray + - ray-llm, ray-llm-extra -> rayproject/ray-llm + - ray-ml, ray-ml-extra -> rayproject/ray-ml + + Tags are generated matching the original docker_container.py format: + - Nightly: nightly.YYMMDD.{sha[:6]}-py310-cpu + - Release: {release_name}.{sha[:6]}-py310-cpu + + For GPU_PLATFORM (cu12.1.1-cudnn8), also pushes with -gpu alias tag. + """ + dry_run = not upload + if dry_run: + logger.info("DRY RUN MODE - no images will be pushed") + + # Get required environment variables + rayci_work_repo = os.environ.get("RAYCI_WORK_REPO") + rayci_build_id = os.environ.get("RAYCI_BUILD_ID") + commit = os.environ.get("BUILDKITE_COMMIT") + + required = { + "RAYCI_WORK_REPO": rayci_work_repo, + "RAYCI_BUILD_ID": rayci_build_id, + "BUILDKITE_COMMIT": commit, + } + missing = [k for k, v in required.items() if not v] + if missing: + raise PushRayImageError(f"Missing required env vars: {', '.join(missing)}") + + # Determine destination Docker Hub repository from image type + docker_hub_repo = f"rayproject/{RAY_REPO_MAP[image_type]}" + logger.info(f"Image type: {image_type} -> Docker Hub repo: {docker_hub_repo}") + + # Construct source image from Wanda cache + wanda_image_name = _get_wanda_image_name( + image_type, python_version, platform, architecture + ) + source_tag = f"{rayci_work_repo}:{rayci_build_id}-{wanda_image_name}" + + # Generate destination tags (may include aliases like -gpu for GPU_PLATFORM) + destination_tags = _generate_image_tags( + commit, python_version, platform, architecture + ) + + logger.info(f"Source image (Wanda): {source_tag}") + logger.info(f"Destination tags: {destination_tags}") + + # Authenticate with ECR (source registry) + # Docker Hub auth is handled by copy_files.py --destination docker_login + ecr_registry = rayci_work_repo.split("/")[0] + ecr_docker_login(ecr_registry) + + # Verify source image exists + logger.info("Verifying source image in Wanda cache...") + if not _image_exists(source_tag): + raise PushRayImageError(f"Source image not found in Wanda cache: {source_tag}") + + # Copy image to Docker Hub with all tags + for tag in destination_tags: + full_destination = f"{docker_hub_repo}:{tag}" + _copy_image(source_tag, full_destination, dry_run) + + logger.info(f"Successfully pushed {image_type} image with tags: {destination_tags}") + + +if __name__ == "__main__": + main() diff --git a/ci/ray_ci/automation/test_push_ray_image.py b/ci/ray_ci/automation/test_push_ray_image.py new file mode 100644 index 000000000000..8ee884669fcd --- /dev/null +++ b/ci/ray_ci/automation/test_push_ray_image.py @@ -0,0 +1,212 @@ +import sys +from unittest import mock + +import pytest + +from ci.ray_ci.automation.push_ray_image import ( + GPU_PLATFORM, + _format_architecture_tag, + _format_platform_tag, + _format_python_version_tag, + _generate_image_tags, + _get_wanda_image_name, +) + + +class TestFormatPythonVersionTag: + @pytest.mark.parametrize( + ("python_version", "expected"), + [ + ("3.10", "-py310"), + ("3.11", "-py311"), + ("3.12", "-py312"), + ("3.9", "-py39"), + ], + ) + def test_format_python_version_tag(self, python_version, expected): + assert _format_python_version_tag(python_version) == expected + + +class TestFormatPlatformTag: + @pytest.mark.parametrize( + ("platform", "expected"), + [ + ("cpu", "-cpu"), + ("cu11.7.1-cudnn8", "-cu117"), + ("cu11.8.0-cudnn8", "-cu118"), + ("cu12.1.1-cudnn8", "-cu121"), + ("cu12.3.2-cudnn9", "-cu123"), + ("cu12.8.1-cudnn", "-cu128"), + ], + ) + def test_format_platform_tag(self, platform, expected): + assert _format_platform_tag(platform) == expected + + +class TestFormatArchitectureTag: + @pytest.mark.parametrize( + ("architecture", "expected"), + [ + ("x86_64", ""), + ("aarch64", "-aarch64"), + ], + ) + def test_format_architecture_tag(self, architecture, expected): + assert _format_architecture_tag(architecture) == expected + + +class TestGetWandaImageName: + @pytest.mark.parametrize( + ("image_type", "python_version", "platform", "architecture", "expected"), + [ + ("ray", "3.10", "cpu", "x86_64", "ray-py3.10-cpu"), + ("ray", "3.11", "cu12.1.1-cudnn8", "x86_64", "ray-py3.11-cu12.1.1-cudnn8"), + ("ray", "3.10", "cpu", "aarch64", "ray-py3.10-cpu-aarch64"), + ("ray-extra", "3.10", "cpu", "x86_64", "ray-extra-py3.10-cpu"), + ( + "ray-extra", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-extra-py3.11-cu12.8.1-cudnn", + ), + ( + "ray-llm", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-llm-py3.11-cu12.8.1-cudnn", + ), + ( + "ray-llm-extra", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-llm-extra-py3.11-cu12.8.1-cudnn", + ), + ], + ) + def test_get_wanda_image_name( + self, image_type, python_version, platform, architecture, expected + ): + assert ( + _get_wanda_image_name(image_type, python_version, platform, architecture) + == expected + ) + + +class TestGenerateImageTags: + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "nightly", + }, + ) + @mock.patch("ci.ray_ci.automation.push_ray_image.datetime") + def test_nightly_tags(self, mock_datetime): + mock_datetime.now.return_value.strftime.return_value = "260107" + + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["nightly.260107.abc123-py310-cpu"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "nightly", + }, + ) + @mock.patch("ci.ray_ci.automation.push_ray_image.datetime") + def test_nightly_tags_gpu_platform_includes_alias(self, mock_datetime): + mock_datetime.now.return_value.strftime.return_value = "260107" + + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform=GPU_PLATFORM, + architecture="x86_64", + ) + + assert tags == [ + "nightly.260107.abc123-py310-cu121", + "nightly.260107.abc123-py310-gpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "RAYCI_SCHEDULE": "", + }, + ) + def test_release_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.11", + platform="cu12.3.2-cudnn9", + architecture="x86_64", + ) + + assert tags == ["2.44.0.abc123-py311-cu123"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "RAYCI_SCHEDULE": "", + }, + ) + def test_release_tags_aarch64(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="aarch64", + ) + + assert tags == ["2.44.0.abc123-py310-cpu-aarch64"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "feature-branch", + "RAYCI_SCHEDULE": "", + }, + ) + def test_other_branch_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.12", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["abc123-py312-cpu"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "", # Not nightly + }, + ) + def test_master_non_nightly_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["abc123-py310-cpu"] + + +if __name__ == "__main__": + sys.exit(pytest.main(["-vv", __file__]))