diff --git a/.buildkite/build.rayci.yml b/.buildkite/build.rayci.yml index 3c27fedbe20a..9a8b5762921b 100644 --- a/.buildkite/build.rayci.yml +++ b/.buildkite/build.rayci.yml @@ -35,23 +35,46 @@ steps: HOSTTYPE: "x86_64" MANYLINUX_VERSION: "260103.868e54c" - - label: ":tapioca: build: wheel {{matrix}} (x86_64)" - key: linux_wheels + - name: ray-wheel-build + label: "wanda: wheel py{{matrix}} (x86_64)" + wanda: ci/docker/ray-wheel.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + HOSTTYPE: "x86_64" + MANYLINUX_VERSION: "260103.868e54c" tags: - release_wheels - linux_wheels - oss - instance_type: large + depends_on: + - ray-core-build + - ray-java-build + - ray-dashboard-build + + # Upload wheels to S3 + - label: ":s3: upload: wheel py{{matrix}} (x86_64)" + key: linux_wheels_upload + instance_type: small commands: - - bazel run //ci/ray_ci:build_in_docker -- wheel --python-version {{matrix}} --architecture x86_64 --upload + - ./ci/build/extract_wanda_wheels.sh ray-wheel-py{{matrix}} + - ./ci/build/copy_build_artifacts.sh wheel matrix: - "3.10" - "3.11" - "3.12" - "3.13" depends_on: - - manylinux-x86_64 - - forge + - ray-wheel-build + tags: + - release_wheels + - skip-on-premerge + - oss - label: ":tapioca: build: jar" key: java_wheels @@ -66,89 +89,311 @@ steps: depends_on: manylinux-x86_64 job_env: manylinux-x86_64 - - label: ":tapioca: build: ray py{{matrix}} docker (x86_64)" - key: ray_images + - name: ray-image-cpu-build + label: "wanda: ray py{{matrix}} cpu (x86_64)" + wanda: ci/docker/ray-image-cpu.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray --upload depends_on: - - manylinux-x86_64 - - forge - - raycudabase + - ray-wheel-build - raycpubase + + # Push ray CPU images to Docker Hub + - label: ":docker: push: ray py{{matrix}} cpu (x86_64)" + key: ray_images_cpu_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix}} + --platform cpu + --image-type ray + --upload matrix: - "3.10" - "3.11" - "3.12" + depends_on: + - ray-image-cpu-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - - label: ":tapioca: build: ray-extra py{{matrix}} docker (x86_64)" - key: ray_extra_images + - name: ray-image-cuda-build + label: "wanda: ray py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium + depends_on: + - ray-wheel-build + - raycudabase + + # Push ray CUDA images to Docker Hub + - label: ":docker: push: ray py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_images_cuda_push + instance_type: small commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray-extra --upload + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray + --upload + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" depends_on: - - manylinux-x86_64 - - forge - - raycpubaseextra - - raycudabaseextra + - ray-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss + + - name: ray-extra-image-cpu-build + label: "wanda: ray-extra py{{matrix}} cpu (x86_64)" + wanda: ci/docker/ray-extra-image-cpu.wanda.yaml matrix: - "3.10" - "3.11" - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build + - raycpubaseextra - - label: ":tapioca: build: ray-llm py{{matrix}} docker (x86_64)" + - name: ray-extra-image-cuda-build + label: "wanda: ray-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-extra-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" tags: - python_dependencies - docker - oss - instance_type: medium - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu12.8.1-cudnn --image-type ray-llm --upload depends_on: - - manylinux-x86_64 - - forge + - ray-wheel-build + - raycudabaseextra + + - name: ray-llm-image-cuda-build + label: "wanda: ray-llm py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-llm-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build - ray-llmbase + + - name: ray-llm-extra-image-cuda-build + label: "wanda: ray-llm-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + wanda: ci/docker/ray-llm-extra-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - python_dependencies + - docker + - oss + depends_on: + - ray-wheel-build + - ray-llmbaseextra + + # Push ray-extra CPU images to Docker Hub (goes to rayproject/ray) + - label: ":docker: push: ray-extra py{{matrix}} cpu (x86_64)" + key: ray_extra_images_cpu_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix}} + --platform cpu + --image-type ray-extra + --upload matrix: + - "3.10" - "3.11" + - "3.12" + depends_on: + - ray-extra-image-cpu-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - - label: ":tapioca: build: ray-llm-extra py{{matrix}} docker (x86_64)" + # Push ray-extra CUDA images to Docker Hub (goes to rayproject/ray) + - label: ":docker: push: ray-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_extra_images_cuda_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-extra + --upload + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + depends_on: + - ray-extra-image-cuda-build tags: - python_dependencies - docker + - skip-on-premerge - oss - instance_type: medium + + # Push ray-llm CUDA images to Docker Hub (goes to rayproject/ray-llm) + - label: ":docker: push: ray-llm py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_llm_images_cuda_push + instance_type: small commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu12.8.1-cudnn --image-type ray-llm-extra --upload + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-llm + --upload + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" depends_on: - - manylinux-x86_64 - - forge - - ray-llmbaseextra + - ray-llm-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss + + # Push ray-llm-extra CUDA images to Docker Hub (goes to rayproject/ray-llm) + - label: ":docker: push: ray-llm-extra py{{matrix.python}} cu{{matrix.cuda}} (x86_64)" + key: ray_llm_extra_images_cuda_push + instance_type: small + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --image-type ray-llm-extra + --upload matrix: - - "3.11" + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + depends_on: + - ray-llm-extra-image-cuda-build + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - label: ":tapioca: smoke test build-docker.sh" tags: @@ -165,6 +410,7 @@ steps: depends_on: - forge + # Generate nightly indexes after images are pushed - label: ":tapioca: generate nightly indexes" instance_type: small tags: @@ -175,6 +421,7 @@ steps: - bazel run .buildkite:copy_files -- --destination docker_login - bazel run //ci/ray_ci/automation:generate_index -- --prefix nightly depends_on: - - ray_images + - ray_images_cpu_push + - ray_images_cuda_push - ray_images_aarch64 - forge diff --git a/.buildkite/linux_aarch64.rayci.yml b/.buildkite/linux_aarch64.rayci.yml index 1c368789f08c..2b1692751d6d 100644 --- a/.buildkite/linux_aarch64.rayci.yml +++ b/.buildkite/linux_aarch64.rayci.yml @@ -29,6 +29,15 @@ steps: MANYLINUX_VERSION: "260103.868e54c" instance_type: builder-arm64 + - name: ray-dashboard-build-aarch64 + label: "wanda: dashboard (aarch64)" + wanda: ci/docker/ray-dashboard.wanda.yaml + env: + HOSTTYPE: "aarch64" + MANYLINUX_VERSION: "260103.868e54c" + tags: release_wheels + instance_type: builder-arm64 + - name: raycpubase-aarch64 label: "wanda: ray.py{{matrix}}.cpu.base (aarch64)" tags: @@ -133,77 +142,265 @@ steps: tags: release_wheels instance_type: builder-arm64 - - label: ":tapioca: build: wheel {{matrix}} (aarch64)" + - name: ray-wheel-build-aarch64 + label: "wanda: wheel py{{matrix}} (aarch64)" + wanda: ci/docker/ray-wheel.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "-aarch64" + HOSTTYPE: "aarch64" + MANYLINUX_VERSION: "260103.868e54c" tags: + - release_wheels - linux_wheels + - oss + instance_type: builder-arm64 + depends_on: + - ray-core-build-aarch64 + - ray-java-build-aarch64 + - ray-dashboard-build-aarch64 + + # Build ray-cpp core (C++ headers, libs, examples) for aarch64 + - name: ray-cpp-core-build-aarch64 + label: "wanda: cpp core py{{matrix}} (aarch64)" + wanda: ci/docker/ray-cpp-core.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "-aarch64" + HOSTTYPE: "aarch64" + MANYLINUX_VERSION: "260103.868e54c" + tags: - release_wheels - oss - instance_type: medium-arm64 + instance_type: builder-arm64 + + # Build ray-cpp wheel for aarch64 + - name: ray-cpp-wheel-build-aarch64 + label: "wanda: cpp wheel py{{matrix}} (aarch64)" + wanda: ci/docker/ray-cpp-wheel.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "-aarch64" + HOSTTYPE: "aarch64" + MANYLINUX_VERSION: "260103.868e54c" + tags: + - release_wheels + - linux_wheels + - oss + instance_type: builder-arm64 + depends_on: + - ray-core-build-aarch64 + - ray-cpp-core-build-aarch64 + - ray-java-build-aarch64 + - ray-dashboard-build-aarch64 + + # Upload wheels to S3 (aarch64) + - label: ":s3: upload: wheel py{{matrix}} (aarch64)" + key: linux_wheels_upload_aarch64 + instance_type: small-arm64 commands: - - bazel run //ci/ray_ci:build_in_docker -- wheel --python-version {{matrix}} --architecture aarch64 --upload + - ./ci/build/extract_wanda_wheels.sh ray-wheel-py{{matrix}}-aarch64 --upload matrix: - "3.10" - "3.11" - "3.12" - "3.13" depends_on: - - manylinux-aarch64 - - forge-aarch64 - job_env: forge-aarch64 + - ray-wheel-build-aarch64 + tags: + - release_wheels + - skip-on-premerge + - oss - - label: ":tapioca: build: ray-extra py{{matrix}} docker (aarch64)" - key: ray_extra_images_aarch64 + # Upload cpp wheels to S3 (aarch64) + - label: ":s3: upload: cpp wheel py{{matrix}} (aarch64)" + key: linux_cpp_wheels_upload_aarch64 + instance_type: small-arm64 + commands: + - ./ci/build/extract_wanda_wheels.sh ray-cpp-wheel-py{{matrix}}-aarch64 --upload + matrix: + - "3.10" + - "3.11" + - "3.12" + depends_on: + - ray-cpp-wheel-build-aarch64 + tags: + - release_wheels + - skip-on-premerge + - oss + + - name: ray-extra-image-cpu-build-aarch64 + label: "wanda: ray-extra py{{matrix}} cpu (aarch64)" + wanda: ci/docker/ray-extra-image-cpu.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "-aarch64" tags: - python_dependencies - docker - oss - instance_type: medium-arm64 - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray-extra --architecture aarch64 --upload + instance_type: builder-arm64 depends_on: - - manylinux-aarch64 - - forge-aarch64 - - raycudabaseextra-aarch64 + - ray-wheel-build-aarch64 - raycpubaseextra-aarch64 - job_env: forge-aarch64 + + - name: ray-extra-image-cuda-build-aarch64 + label: "wanda: ray-extra py{{matrix.python}} cu{{matrix.cuda}} (aarch64)" + wanda: ci/docker/ray-extra-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "-aarch64" + tags: + - python_dependencies + - docker + - oss + instance_type: builder-arm64 + depends_on: + - ray-wheel-build-aarch64 + - raycudabaseextra-aarch64 + + - name: ray-image-cpu-build-aarch64 + label: "wanda: ray py{{matrix}} cpu (aarch64)" + wanda: ci/docker/ray-image-cpu.wanda.yaml matrix: - "3.10" - "3.11" - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "-aarch64" + tags: + - python_dependencies + - docker + - oss + instance_type: builder-arm64 + depends_on: + - ray-wheel-build-aarch64 + - raycpubase-aarch64 - - label: ":tapioca: build: ray py{{matrix}} docker (aarch64)" - key: ray_images_aarch64 + - name: ray-image-cuda-build-aarch64 + label: "wanda: ray py{{matrix.python}} cu{{matrix.cuda}} (aarch64)" + wanda: ci/docker/ray-image-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "-aarch64" tags: - python_dependencies - docker - oss - instance_type: medium-arm64 - commands: - - bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix}} - --platform cu11.7.1-cudnn8 --platform cu11.8.0-cudnn8 - --platform cu12.1.1-cudnn8 --platform cu12.3.2-cudnn9 - --platform cu12.4.1-cudnn --platform cu12.5.1-cudnn - --platform cu12.6.3-cudnn --platform cu12.8.1-cudnn - --platform cu12.9.1-cudnn - --platform cpu - --image-type ray --architecture aarch64 --upload + instance_type: builder-arm64 depends_on: - - manylinux-aarch64 - - forge-aarch64 + - ray-wheel-build-aarch64 - raycudabase-aarch64 - - raycpubase-aarch64 - job_env: forge-aarch64 + + # Push ray CPU images to Docker Hub (aarch64) + - label: ":docker: push: ray py{{matrix}} cpu (aarch64)" + key: ray_images_aarch64 + instance_type: small-arm64 + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix}} + --platform cpu + --architecture aarch64 + --upload matrix: - "3.10" - "3.11" - "3.12" + depends_on: + - ray-image-cpu-build-aarch64 + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss + + # Push ray CUDA images to Docker Hub (aarch64) + - label: ":docker: push: ray py{{matrix.python}} cu{{matrix.cuda}} (aarch64)" + key: ray_images_cuda_aarch64 + instance_type: small-arm64 + commands: + - bazel run //.buildkite:copy_files -- --destination docker_login + - bazel run //ci/ray_ci/automation:push_ray_image -- + --python-version {{matrix.python}} + --platform cu{{matrix.cuda}} + --architecture aarch64 + --upload + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "11.7.1-cudnn8" + - "11.8.0-cudnn8" + - "12.1.1-cudnn8" + - "12.3.2-cudnn9" + - "12.4.1-cudnn" + - "12.5.1-cudnn" + - "12.6.3-cudnn" + - "12.8.1-cudnn" + - "12.9.1-cudnn" + depends_on: + - ray-image-cuda-build-aarch64 + tags: + - python_dependencies + - docker + - skip-on-premerge + - oss - label: ":ray: core: wheel-aarch64 tests" tags: linux_wheels diff --git a/.buildkite/release/build.rayci.yml b/.buildkite/release/build.rayci.yml index 01497e9b95a1..a326bf92afaa 100644 --- a/.buildkite/release/build.rayci.yml +++ b/.buildkite/release/build.rayci.yml @@ -1,5 +1,55 @@ group: release build steps: + # Build ray core components (required for wheel builds) + - name: ray-core-build + label: "wanda: core binary parts py{{matrix}} (x86_64)" + wanda: ci/docker/ray-core.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + HOSTTYPE: "x86_64" + MANYLINUX_VERSION: "260103.868e54c" + tags: oss + + - name: ray-dashboard-build + label: "wanda: dashboard" + wanda: ci/docker/ray-dashboard.wanda.yaml + env: + HOSTTYPE: "x86_64" + MANYLINUX_VERSION: "260103.868e54c" + tags: oss + + - name: ray-java-build + label: "wanda: java build (x86_64)" + wanda: ci/docker/ray-java.wanda.yaml + tags: oss + env: + ARCH_SUFFIX: "" + HOSTTYPE: "x86_64" + MANYLINUX_VERSION: "260103.868e54c" + + - name: ray-wheel-build + label: "wanda: wheel py{{matrix}} (x86_64)" + wanda: ci/docker/ray-wheel.wanda.yaml + matrix: + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + HOSTTYPE: "x86_64" + MANYLINUX_VERSION: "260103.868e54c" + tags: oss + depends_on: + - ray-core-build + - ray-java-build + - ray-dashboard-build + - name: raycpubaseextra-testdeps label: "wanda: ray.py{{matrix}}.cpu.base-extra-testdeps" wanda: docker/base-extra-testdeps/cpu.wanda.yaml @@ -67,26 +117,72 @@ steps: depends_on: - ray-mlcudabaseextra - - label: ":tapioca: build: ray py{{matrix.python}}-{{matrix.platform}} image for release tests" + # Build ray anyscale images using wanda (CPU) + - name: ray-anyscale-cpu-build + label: "wanda: ray-anyscale py{{matrix}} cpu" + wanda: ci/docker/ray-anyscale-cpu.wanda.yaml + matrix: + # This list should be kept in sync with the list of supported Python in + # release test suite + - "3.10" + - "3.11" + - "3.12" + env: + PYTHON_VERSION: "{{matrix}}" + ARCH_SUFFIX: "" + tags: + - oss + depends_on: + - ray-wheel-build + - raycpubaseextra-testdeps + + # Build ray anyscale images using wanda (CUDA) + - name: ray-anyscale-cuda-build + label: "wanda: ray-anyscale py{{matrix.python}} cu{{matrix.cuda}}" + wanda: ci/docker/ray-anyscale-cuda.wanda.yaml + matrix: + setup: + python: + - "3.10" + - "3.11" + - "3.12" + cuda: + - "12.3.2-cudnn9" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - oss + depends_on: + - ray-wheel-build + - raycudabaseextra-testdeps + + # Push anyscale images to ECR/GCP/Azure registries + - label: ":docker: push: ray-anyscale py{{matrix.python}} {{matrix.platform}}" key: anyscalebuild instance_type: release-medium mount_buildkite_agent: true tags: - oss commands: - - bazel run //ci/ray_ci:build_in_docker -- anyscale - --python-version {{matrix.python}} --platform {{matrix.platform}} - --image-type ray --upload + # Authenticate with GCP and Azure before pushing + - bash release/gcloud_docker_login.sh release/aws2gce_iam.json + - bash release/azure_docker_login.sh + - az acr login --name rayreleasetest + # PATH must include gcloud SDK for crane to use docker credential helper + - export PATH=$(pwd)/google-cloud-sdk/bin:$PATH && + bazel run //ci/ray_ci/automation:push_anyscale_image -- + --python-version {{matrix.python}} + --platform {{matrix.platform}} + --image-type ray + --upload depends_on: - - manylinux-x86_64 - - forge - - raycpubaseextra-testdeps - - raycudabaseextra-testdeps + - ray-anyscale-cpu-build + - ray-anyscale-cuda-build matrix: setup: python: - # This list should be kept in sync with the list of supported Python in - # release test suite - "3.10" - "3.11" - "3.12" @@ -94,35 +190,93 @@ steps: - cu12.3.2-cudnn9 - cpu - - label: ":tapioca: build: ray-llm py{{matrix}} image for release tests" + # Build ray-llm anyscale image using wanda + - name: ray-llm-anyscale-cuda-build + label: "wanda: ray-llm-anyscale py{{matrix.python}} cu{{matrix.cuda}}" + wanda: ci/docker/ray-llm-anyscale-cuda.wanda.yaml + matrix: + setup: + python: + - "3.11" + cuda: + - "12.8.1-cudnn" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - oss + depends_on: + - ray-wheel-build + - ray-llmbaseextra-testdeps + + # Push ray-llm anyscale images to ECR/GCP/Azure registries + - label: ":docker: push: ray-llm-anyscale py{{matrix}} cu12.8.1-cudnn" key: anyscalellmbuild instance_type: release-medium mount_buildkite_agent: true tags: - oss commands: - - bazel run //ci/ray_ci:build_in_docker -- anyscale --python-version {{matrix}} - --platform cu12.8.1-cudnn --image-type ray-llm --upload + # Authenticate with GCP and Azure before pushing + - bash release/gcloud_docker_login.sh release/aws2gce_iam.json + - bash release/azure_docker_login.sh + - az acr login --name rayreleasetest + # PATH must include gcloud SDK for crane to use docker credential helper + - export PATH=$(pwd)/google-cloud-sdk/bin:$PATH && + bazel run //ci/ray_ci/automation:push_anyscale_image -- + --python-version {{matrix}} + --platform cu12.8.1-cudnn + --image-type ray-llm + --upload depends_on: - - manylinux-x86_64 - - forge - - ray-llmbaseextra-testdeps + - ray-llm-anyscale-cuda-build matrix: - "3.11" - - label: ":tapioca: build: ray-ml py{{matrix}} image for release tests" + # Build ray-ml anyscale image using wanda + - name: ray-ml-anyscale-cuda-build + label: "wanda: ray-ml-anyscale py{{matrix.python}} cu{{matrix.cuda}}" + wanda: ci/docker/ray-ml-anyscale-cuda.wanda.yaml + matrix: + setup: + python: + # This list should be kept in sync with the list of supported Python in + # release test suite + - "3.10" + cuda: + - "12.1.1-cudnn8" + env: + PYTHON_VERSION: "{{matrix.python}}" + CUDA_VERSION: "{{matrix.cuda}}" + ARCH_SUFFIX: "" + tags: + - oss + depends_on: + - ray-wheel-build + - ray-mlcudabaseextra-testdeps + + # Push ray-ml anyscale images to ECR/GCP/Azure registries + - label: ":docker: push: ray-ml-anyscale py{{matrix}} cu12.1.1-cudnn8" key: anyscalemlbuild instance_type: release-medium mount_buildkite_agent: true tags: - oss commands: - - bazel run //ci/ray_ci:build_in_docker -- anyscale --python-version {{matrix}} - --platform cu12.1.1-cudnn8 --image-type ray-ml --upload + # Authenticate with GCP and Azure before pushing + - bash release/gcloud_docker_login.sh release/aws2gce_iam.json + - bash release/azure_docker_login.sh + - az acr login --name rayreleasetest + # PATH must include gcloud SDK for crane to use docker credential helper + - export PATH=$(pwd)/google-cloud-sdk/bin:$PATH && + bazel run //ci/ray_ci/automation:push_anyscale_image -- + --python-version {{matrix}} + --platform cu12.1.1-cudnn8 + --image-type ray-ml + --upload depends_on: - - manylinux-x86_64 - - forge - - ray-mlcudabaseextra-testdeps + - ray-ml-anyscale-cuda-build matrix: # This list should be kept in sync with the list of supported Python in # release test suite diff --git a/.rayciversion b/.rayciversion index 885415662ff8..215740905987 100644 --- a/.rayciversion +++ b/.rayciversion @@ -1 +1 @@ -0.21.0 +0.22.0 diff --git a/ci/build/extract_wanda_wheels.sh b/ci/build/extract_wanda_wheels.sh new file mode 100755 index 000000000000..e89d577c0821 --- /dev/null +++ b/ci/build/extract_wanda_wheels.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +set -exuo pipefail + +# Extract wheels from a Wanda-cached image. +# +# Usage: ./ci/build/extract_wanda_wheels.sh +# +# Example: +# ./ci/build/extract_wanda_wheels.sh ray-wheel-py3.10 +# +# The script will: +# 1. Pull the wanda image from ECR cache +# 2. Extract .whl files from the image +# 3. Move them to .whl/ directory (clears existing wheels first) + +WANDA_IMAGE_NAME=${1:?Usage: $0 } + +# Construct full image tag from environment +WANDA_IMAGE="${RAYCI_WORK_REPO}:${RAYCI_BUILD_ID}-${WANDA_IMAGE_NAME}" + +echo "Extracting wheels from: ${WANDA_IMAGE}" + +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir" || true' EXIT + +docker pull "${WANDA_IMAGE}" + +# Image has no default CMD, so provide a dummy command. +container_id="$(docker create "${WANDA_IMAGE}" /no-such-cmd)" + +# Copy wheel files directly from container (wanda wheel images have wheels at /) +docker cp "${container_id}":/ "${tmpdir}/" +docker rm "${container_id}" + +# Clear existing wheels to avoid stale files from previous runs +rm -rf .whl +mkdir -p .whl + +# Move extracted wheels (handles nested paths if any) +find "${tmpdir}" -type f -name '*.whl' -exec mv {} .whl/ \; + +# Verify that wheels were actually extracted +shopt -s nullglob +wheels=(.whl/*.whl) +shopt -u nullglob + +if (( ${#wheels[@]} == 0 )); then + echo "ERROR: No wheel files were extracted from image: ${WANDA_IMAGE}" >&2 + echo "This may indicate image corruption, incorrect image tag, or path issues." >&2 + exit 1 +fi + +echo "Extracted ${#wheels[@]} wheel(s):" +ls -la .whl/ diff --git a/ci/docker/ray-anyscale-cpu.wanda.yaml b/ci/docker/ray-anyscale-cpu.wanda.yaml new file mode 100644 index 000000000000..a4a1280dda5e --- /dev/null +++ b/ci/docker/ray-anyscale-cpu.wanda.yaml @@ -0,0 +1,20 @@ +# Ray Anyscale CPU Image (for release tests) +# Installs ray wheel into the CPU base-extra-testdeps image +# +# This produces the anyscale test image for CPU-only release tests. +# Unlike the regular ray image, this uses base-extra-testdeps which +# includes additional test dependencies. +# +name: "ray-anyscale-py$PYTHON_VERSION-cpu$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra-testdeps" # CPU base with test deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra-testdeps + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-anyscale-cuda.wanda.yaml b/ci/docker/ray-anyscale-cuda.wanda.yaml new file mode 100644 index 000000000000..f85a73ad9818 --- /dev/null +++ b/ci/docker/ray-anyscale-cuda.wanda.yaml @@ -0,0 +1,21 @@ +# Ray Anyscale CUDA Image (for release tests) +# Installs ray wheel into the CUDA base-extra-testdeps image +# +# This produces the anyscale test image for GPU release tests. +# Unlike the regular ray image, this uses base-extra-testdeps which +# includes additional test dependencies. +# +name: "ray-anyscale-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps" # CUDA base with test deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-extra-image-cpu.wanda.yaml b/ci/docker/ray-extra-image-cpu.wanda.yaml new file mode 100644 index 000000000000..a07dcae79f43 --- /dev/null +++ b/ci/docker/ray-extra-image-cpu.wanda.yaml @@ -0,0 +1,19 @@ +# Ray Extra CPU Image +# Installs ray wheel into the CPU base-extra image +# +# This produces the final rayproject/ray image with -extra tag for CPU-only deployments. +# The base-extra image includes additional Python dependencies beyond base. +# +name: "ray-extra-py$PYTHON_VERSION-cpu$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra$ARCH_SUFFIX" # CPU base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-extra-image-cuda.wanda.yaml b/ci/docker/ray-extra-image-cuda.wanda.yaml new file mode 100644 index 000000000000..2d2629cdb959 --- /dev/null +++ b/ci/docker/ray-extra-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray Extra CUDA Image +# Installs ray wheel into the CUDA base-extra image +# +# This produces the final rayproject/ray image with -extra tag for GPU deployments. +# The base-extra image includes additional Python dependencies beyond base. +# +name: "ray-extra-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX" # CUDA base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image-cpu.wanda.yaml b/ci/docker/ray-image-cpu.wanda.yaml new file mode 100644 index 000000000000..ff51000348bf --- /dev/null +++ b/ci/docker/ray-image-cpu.wanda.yaml @@ -0,0 +1,18 @@ +# Ray CPU Image +# Installs ray wheel into the CPU base image +# +# This produces the final rayproject/ray image for CPU-only deployments. +# +name: "ray-py$PYTHON_VERSION-cpu$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base$ARCH_SUFFIX" # CPU base image with Python + deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cpu-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image-cuda.wanda.yaml b/ci/docker/ray-image-cuda.wanda.yaml new file mode 100644 index 000000000000..bd716d920748 --- /dev/null +++ b/ci/docker/ray-image-cuda.wanda.yaml @@ -0,0 +1,19 @@ +# Ray CUDA Image +# Installs ray wheel into the CUDA base image +# +# This produces the final rayproject/ray image for GPU deployments. +# +name: "ray-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX" # CUDA base image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-image.Dockerfile b/ci/docker/ray-image.Dockerfile new file mode 100644 index 000000000000..e13fa2093c10 --- /dev/null +++ b/ci/docker/ray-image.Dockerfile @@ -0,0 +1,58 @@ +# syntax=docker/dockerfile:1.3-labs +# +# Ray Image Builder +# ================= +# Installs the Ray wheel into a base image (CPU or CUDA). +# +# This Dockerfile uses multi-stage builds to: +# 1. Extract the wheel from the ray-wheel wanda cache (scratch image) +# 2. Install it into the base image (ray-py{VER}-{cpu/cuda}-base) +# +# The base image already contains: +# - Python with conda/anaconda +# - Core dependencies (numpy, etc.) +# - System libraries (jemalloc, etc.) +# +# This image adds: +# - Ray wheel with [all] extras +# - pip freeze output for reproducibility +# +ARG BASE_IMAGE +ARG RAY_WHEEL_IMAGE + +FROM ${RAY_WHEEL_IMAGE} AS wheel-source +FROM ${BASE_IMAGE} + +ARG PYTHON_VERSION=3.10 + +COPY --from=wheel-source /*.whl /tmp/ +COPY python/requirements_compiled.txt /tmp/ + +# Install Ray wheel with all extras +RUN <&2 + ls -l /tmp/*.whl >&2 + exit 1 +fi +WHEEL_FILE="${WHEEL_FILES[0]}" + +echo "Installing wheel: $WHEEL_FILE" + +# Install ray with all extras, using constraints for reproducibility +$HOME/anaconda3/bin/pip --no-cache-dir install \ + -c /tmp/requirements_compiled.txt \ + "${WHEEL_FILE}[all]" + +# Save pip freeze for debugging/reproducibility +$HOME/anaconda3/bin/pip freeze > /home/ray/pip-freeze.txt + +echo "Ray version: $($HOME/anaconda3/bin/python -c 'import ray; print(ray.__version__)')" +EOF + +CMD ["python"] diff --git a/ci/docker/ray-llm-anyscale-cuda.wanda.yaml b/ci/docker/ray-llm-anyscale-cuda.wanda.yaml new file mode 100644 index 000000000000..584121d4700e --- /dev/null +++ b/ci/docker/ray-llm-anyscale-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray-LLM Anyscale CUDA Image (for release tests) +# Installs ray wheel into the ray-llm CUDA base-extra-testdeps image +# +# This produces the anyscale test image for ray-llm GPU release tests. +# Uses ray-llm base with extra test dependencies. +# +name: "ray-llm-anyscale-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps" # ray-llm base with test deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-llm-extra-image-cuda.wanda.yaml b/ci/docker/ray-llm-extra-image-cuda.wanda.yaml new file mode 100644 index 000000000000..1bf0993392ff --- /dev/null +++ b/ci/docker/ray-llm-extra-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray LLM Extra CUDA Image +# Installs ray wheel into the ray-llm base-extra image +# +# This produces the final rayproject/ray-llm image with -extra tag. +# The ray-llm base-extra image includes additional Python dependencies beyond base. +# +name: "ray-llm-extra-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX" # LLM base-extra image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-llm-image-cuda.wanda.yaml b/ci/docker/ray-llm-image-cuda.wanda.yaml new file mode 100644 index 000000000000..3d96bf7f4f45 --- /dev/null +++ b/ci/docker/ray-llm-image-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray LLM CUDA Image +# Installs ray wheel into the ray-llm base image +# +# This produces the final rayproject/ray-llm image for LLM deployments. +# The ray-llm base image includes vLLM and other LLM-specific dependencies. +# +name: "ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX" # LLM base image + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-llm-py$PYTHON_VERSION-cu$CUDA_VERSION-base$ARCH_SUFFIX + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-ml-anyscale-cuda.wanda.yaml b/ci/docker/ray-ml-anyscale-cuda.wanda.yaml new file mode 100644 index 000000000000..82efd3242eef --- /dev/null +++ b/ci/docker/ray-ml-anyscale-cuda.wanda.yaml @@ -0,0 +1,20 @@ +# Ray-ML Anyscale CUDA Image (for release tests) +# Installs ray wheel into the ray-ml CUDA base-extra-testdeps image +# +# This produces the anyscale test image for ray-ml GPU release tests. +# Uses ray-ml base with extra test dependencies. +# +name: "ray-ml-anyscale-py$PYTHON_VERSION-cu$CUDA_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "cr.ray.io/rayproject/ray-ml-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps" # ray-ml base with test deps + - "cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" # Ray wheel +dockerfile: ci/docker/ray-image.Dockerfile +srcs: + - python/requirements_compiled.txt +build_args: + - PYTHON_VERSION + - CUDA_VERSION + - ARCH_SUFFIX + - BASE_IMAGE=cr.ray.io/rayproject/ray-ml-py$PYTHON_VERSION-cu$CUDA_VERSION-base-extra-testdeps + - RAY_WHEEL_IMAGE=cr.ray.io/rayproject/ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX diff --git a/ci/docker/ray-wheel.Dockerfile b/ci/docker/ray-wheel.Dockerfile new file mode 100644 index 000000000000..a78f22f976a8 --- /dev/null +++ b/ci/docker/ray-wheel.Dockerfile @@ -0,0 +1,129 @@ +# syntax=docker/dockerfile:1.3-labs +# +# Ray Wheel Builder (Unified) +# =========================== +# Builds manylinux2014-compatible wheels using pre-built C++ artifacts from wanda cache. +# +# Build Types: +# - ray wheel: WHEEL_TYPE=ray (default) - Standard Ray Python wheel +# - ray-cpp wheel: WHEEL_TYPE=cpp - Ray C++ API wheel +# +# GLIBC Compatibility: +# -------------------- +# manylinux2014 requires GLIBC <= 2.17 for broad Linux compatibility. +# The pre-built _raylet.so is compiled inside manylinux2014 with GLIBC 2.17. +# + +ARG RAY_CORE_IMAGE +ARG RAY_CPP_CORE_IMAGE=scratch +ARG RAY_JAVA_IMAGE +ARG RAY_DASHBOARD_IMAGE +ARG MANYLINUX_VERSION +ARG HOSTTYPE + +FROM ${RAY_CORE_IMAGE} AS ray-core +FROM ${RAY_CPP_CORE_IMAGE} AS ray-cpp-core +FROM ${RAY_JAVA_IMAGE} AS ray-java +FROM ${RAY_DASHBOARD_IMAGE} AS ray-dashboard + +# Main build stage - manylinux2014 provides GLIBC 2.17 +FROM rayproject/manylinux2014:${MANYLINUX_VERSION}-jdk-${HOSTTYPE} AS builder + +ARG PYTHON_VERSION=3.10 +ARG BUILDKITE_COMMIT +ARG WHEEL_TYPE=ray + +# Set environment variables for the build +# - BUILDKITE_COMMIT: Used for ray.__commit__. Defaults to "unknown" for local builds. +# - SKIP_BAZEL_BUILD=1: Skip bazel build, use pre-built artifacts from ray-core/ray-java/ray-dashboard +# - RAY_DISABLE_EXTRA_CPP: 1 for ray wheel only, 0 for ray-cpp wheel +# - WHEEL_TYPE: "ray" or "cpp" - determines which wheel to build +ENV BUILDKITE_COMMIT=${BUILDKITE_COMMIT:-unknown} \ + PYTHON_VERSION=${PYTHON_VERSION} \ + SKIP_BAZEL_BUILD=1 \ + WHEEL_TYPE=${WHEEL_TYPE} + +WORKDIR /home/forge/ray + +# Copy artifacts from all stages +COPY --from=ray-core /ray_pkg.zip /tmp/ +COPY --from=ray-core /ray_py_proto.zip /tmp/ +COPY --from=ray-java /ray_java_pkg.zip /tmp/ +COPY --from=ray-dashboard /dashboard.tar.gz /tmp/ + +# Source files needed for wheel build +COPY --chown=2000:100 ci/build/build-manylinux-wheel.sh ci/build/ +COPY --chown=2000:100 README.rst pyproject.toml ./ +COPY --chown=2000:100 rllib/ rllib/ +COPY --chown=2000:100 python/ python/ + +USER forge +# Note: ray-cpp-core may be "scratch" (empty) for ray-only builds +RUN --mount=from=ray-cpp-core,source=/,target=/ray-cpp-core,ro \ + <<'EOF' +#!/bin/bash +set -euo pipefail + +PY_VERSION="${PYTHON_VERSION//./}" +PY_BIN="cp${PY_VERSION}-cp${PY_VERSION}" + +# Verify required artifacts exist before unpacking +for f in /tmp/ray_pkg.zip /tmp/ray_py_proto.zip /tmp/ray_java_pkg.zip /tmp/dashboard.tar.gz; do + [[ -f "$f" ]] || { echo "ERROR: missing artifact: $f"; exit 1; } +done + +# Clean extraction dirs to avoid stale leftovers +rm -rf /tmp/ray_pkg /tmp/ray_java_pkg /tmp/ray_cpp_pkg +mkdir -p /tmp/ray_pkg /tmp/ray_java_pkg + +# Unpack common pre-built artifacts +unzip -o /tmp/ray_pkg.zip -d /tmp/ray_pkg +unzip -o /tmp/ray_py_proto.zip -d python/ +unzip -o /tmp/ray_java_pkg.zip -d /tmp/ray_java_pkg +mkdir -p python/ray/dashboard/client/build +tar -xzf /tmp/dashboard.tar.gz -C python/ray/dashboard/client/build/ + +# C++ core artifacts +cp -r /tmp/ray_pkg/ray/* python/ray/ + +# Java JARs +cp -r /tmp/ray_java_pkg/ray/* python/ray/ + +# Handle wheel type specific setup +if [[ "$WHEEL_TYPE" == "cpp" ]]; then + # C++ API artifacts (headers, libs, examples) + if [[ -f /ray-cpp-core/ray_cpp_pkg.zip ]]; then + mkdir -p /tmp/ray_cpp_pkg + unzip -o /ray-cpp-core/ray_cpp_pkg.zip -d /tmp/ray_cpp_pkg + cp -r /tmp/ray_cpp_pkg/ray/cpp python/ray/ + else + echo "ERROR: ray_cpp_pkg.zip not found for cpp wheel build" + exit 1 + fi + export RAY_DISABLE_EXTRA_CPP=0 +else + export RAY_DISABLE_EXTRA_CPP=1 +fi + +# Build wheels +./ci/build/build-manylinux-wheel.sh "$PY_BIN" + +# Filter output based on wheel type +if [[ "$WHEEL_TYPE" == "cpp" ]]; then + # Keep only ray-cpp wheel + rm -f .whl/ray-[0-9]*.whl +fi + +# Sanity check: ensure wheels exist +shopt -s nullglob +wheels=(.whl/*.whl) +if (( ${#wheels[@]} == 0 )); then + echo "ERROR: No wheels produced in .whl/" + ls -la .whl || true + exit 1 +fi + +EOF + +FROM scratch +COPY --from=builder /home/forge/ray/.whl/*.whl / diff --git a/ci/docker/ray-wheel.wanda.yaml b/ci/docker/ray-wheel.wanda.yaml new file mode 100644 index 000000000000..ba08f700b33a --- /dev/null +++ b/ci/docker/ray-wheel.wanda.yaml @@ -0,0 +1,24 @@ +name: "ray-wheel-py$PYTHON_VERSION$ARCH_SUFFIX" +disable_caching: true +froms: + - "rayproject/manylinux2014:$MANYLINUX_VERSION-jdk-$HOSTTYPE" + - "cr.ray.io/rayproject/ray-core-py$PYTHON_VERSION$ARCH_SUFFIX" # C++ binaries (ray_pkg.zip) + - "cr.ray.io/rayproject/ray-java-build$ARCH_SUFFIX" # Java JARs + - "cr.ray.io/rayproject/ray-dashboard" # Dashboard +dockerfile: ci/docker/ray-wheel.Dockerfile +srcs: + - pyproject.toml + - README.rst + - ci/build/build-manylinux-wheel.sh + - python/ + - rllib/ +build_args: + - PYTHON_VERSION + - MANYLINUX_VERSION + - HOSTTYPE + - BUILDKITE_COMMIT + - ARCH_SUFFIX + - WHEEL_TYPE=ray + - RAY_CORE_IMAGE=cr.ray.io/rayproject/ray-core-py$PYTHON_VERSION$ARCH_SUFFIX + - RAY_JAVA_IMAGE=cr.ray.io/rayproject/ray-java-build$ARCH_SUFFIX + - RAY_DASHBOARD_IMAGE=cr.ray.io/rayproject/ray-dashboard diff --git a/ci/pipeline/test_rules.txt b/ci/pipeline/test_rules.txt index c6e3b82ddd55..0fd8dd996552 100644 --- a/ci/pipeline/test_rules.txt +++ b/ci/pipeline/test_rules.txt @@ -10,6 +10,8 @@ # file # File to match # dir/*.py # Pattern to match, using fnmatch, matches dir/a.py dir/dir/b.py or dir/.py # @ tag1 tag2 tag3 # Tags to emit for a rule. A rule without tags is a skipping rule. +# \fallthrough # Tags are always included, matching continues to next rule +# \default # Rule matches any file (catch-all) # # ; # Semicolon to separate rules @@ -20,6 +22,10 @@ ! linux_wheels macos_wheels docker doc python_dependencies tools ! release_tests spark_on_ray +\fallthrough +@ always lint +; + python/ray/air/ @ ml train tune data linux_wheels ; @@ -257,3 +263,9 @@ setup_hooks.sh .fossa.yml # pass ; + +\default +@ ml tune train data serve +@ core_cpp cpp java python doc +@ linux_wheels macos_wheels dashboard tools release_tests +; diff --git a/ci/ray_ci/automation/BUILD.bazel b/ci/ray_ci/automation/BUILD.bazel index 61553051805e..51920da51fef 100644 --- a/ci/ray_ci/automation/BUILD.bazel +++ b/ci/ray_ci/automation/BUILD.bazel @@ -305,3 +305,56 @@ py_binary( ci_require("click"), ], ) + +py_binary( + name = "push_ray_image", + srcs = ["push_ray_image.py"], + exec_compatible_with = ["//bazel:py3"], + deps = [ + ":crane_lib", + "//ci/ray_ci:ray_ci_lib", + ci_require("click"), + ], +) + +py_test( + name = "test_push_ray_image", + size = "small", + srcs = ["test_push_ray_image.py"], + exec_compatible_with = ["//bazel:py3"], + tags = [ + "ci_unit", + "team:ci", + ], + deps = [ + ":push_ray_image", + ci_require("pytest"), + ], +) + +py_binary( + name = "push_anyscale_image", + srcs = ["push_anyscale_image.py"], + exec_compatible_with = ["//bazel:py3"], + deps = [ + ":crane_lib", + "//ci/ray_ci:ray_ci_lib", + "//release:ray_release", + ci_require("click"), + ], +) + +py_test( + name = "test_push_anyscale_image", + size = "small", + srcs = ["test_push_anyscale_image.py"], + exec_compatible_with = ["//bazel:py3"], + tags = [ + "ci_unit", + "team:ci", + ], + deps = [ + ":push_anyscale_image", + ci_require("pytest"), + ], +) diff --git a/ci/ray_ci/automation/push_anyscale_image.py b/ci/ray_ci/automation/push_anyscale_image.py new file mode 100644 index 000000000000..9951adf1e58b --- /dev/null +++ b/ci/ray_ci/automation/push_anyscale_image.py @@ -0,0 +1,254 @@ +""" +Push Wanda-cached anyscale images to ECR, GCP, and Azure registries. + +This script copies anyscale images from the Wanda cache to the three cloud +registries used for release tests: +- AWS ECR: anyscale/{image_type}:{tag} +- GCP Artifact Registry: anyscale/{image_type}:{tag} +- Azure Container Registry: anyscale/{image_type}:{tag} + +Example: + bazel run //ci/ray_ci/automation:push_anyscale_image -- \\ + --python-version 3.10 \\ + --platform cpu \\ + --image-type ray \\ + --upload + +Run with --help to see all options. +""" + +import logging +import os +import sys +from typing import List + +import click + +from ci.ray_ci.automation.crane_lib import ( + call_crane_copy, + call_crane_manifest, +) +from ci.ray_ci.utils import ci_init, ecr_docker_login + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(message)s", + stream=sys.stdout, +) +logger = logging.getLogger(__name__) + +# Registry URLs +_DOCKER_ECR_REPO = os.environ.get( + "RAYCI_WORK_REPO", + "029272617770.dkr.ecr.us-west-2.amazonaws.com/rayproject", +) +_DOCKER_GCP_REGISTRY = os.environ.get( + "RAYCI_GCP_REGISTRY", + "us-west1-docker.pkg.dev/anyscale-oss-ci", +) +_DOCKER_AZURE_REGISTRY = os.environ.get( + "RAYCI_AZURE_REGISTRY", + "rayreleasetest.azurecr.io", +) + +# GPU_PLATFORM is the default GPU platform that gets aliased as "gpu" +# This must match the definition in ci/ray_ci/docker_container.py +GPU_PLATFORM = "cu12.1.1-cudnn8" + + +class PushAnyscaleImageError(Exception): + """Error raised when pushing anyscale images fails.""" + + +def _format_python_version_tag(python_version: str) -> str: + """Format python version as -py310 (no dots, with hyphen prefix).""" + return f"-py{python_version.replace('.', '')}" + + +def _format_platform_tag(platform: str) -> str: + """Format platform as -cpu or shortened CUDA version.""" + if platform == "cpu": + return "-cpu" + # cu12.3.2-cudnn9 -> -cu123 + versions = platform.split(".") + return f"-{versions[0]}{versions[1]}" + + +def _get_image_tags(python_version: str, platform: str) -> List[str]: + """ + Generate image tags matching the original docker_container.py format. + + Returns multiple tags for the image (canonical + aliases). + For GPU_PLATFORM, also generates -gpu alias tags to match release test expectations. + """ + branch = os.environ.get("BUILDKITE_BRANCH", "") + commit = os.environ.get("BUILDKITE_COMMIT", "")[:6] + rayci_build_id = os.environ.get("RAYCI_BUILD_ID", "") + + py_tag = _format_python_version_tag(python_version) + platform_tag = _format_platform_tag(platform) + + # For GPU_PLATFORM, also create -gpu alias (release tests use type: gpu) + platform_tags = [platform_tag] + if platform == GPU_PLATFORM: + platform_tags.append("-gpu") + + tags = [] + + if branch == "master": + # On master, use sha and build_id as tags + for ptag in platform_tags: + tags.append(f"{commit}{py_tag}{ptag}") + if rayci_build_id: + for ptag in platform_tags: + tags.append(f"{rayci_build_id}{py_tag}{ptag}") + elif branch.startswith("releases/"): + # On release branches, use release name + release_name = branch[len("releases/") :] + for ptag in platform_tags: + tags.append(f"{release_name}.{commit}{py_tag}{ptag}") + if rayci_build_id: + for ptag in platform_tags: + tags.append(f"{rayci_build_id}{py_tag}{ptag}") + else: + # For other branches (PRs, etc.) + pr = os.environ.get("BUILDKITE_PULL_REQUEST", "false") + if pr != "false": + for ptag in platform_tags: + tags.append(f"pr-{pr}.{commit}{py_tag}{ptag}") + else: + for ptag in platform_tags: + tags.append(f"{commit}{py_tag}{ptag}") + if rayci_build_id: + for ptag in platform_tags: + tags.append(f"{rayci_build_id}{py_tag}{ptag}") + + return tags + + +def _get_wanda_image_name(python_version: str, platform: str, image_type: str) -> str: + """Get the wanda-cached image name. + + Platform is passed with "cu" prefix (e.g., "cu12.3.2-cudnn9") or "cpu". + """ + if platform == "cpu": + return f"{image_type}-anyscale-py{python_version}-cpu" + else: + # Platform already includes "cu" prefix from pipeline matrix + return f"{image_type}-anyscale-py{python_version}-{platform}" + + +def _image_exists(tag: str) -> bool: + """Check if a container image manifest exists using crane.""" + return_code, _ = call_crane_manifest(tag) + return return_code == 0 + + +def _copy_image(source: str, destination: str, dry_run: bool = False) -> None: + """Copy a container image from source to destination using crane.""" + if dry_run: + logger.info(f"DRY RUN: Would copy {source} -> {destination}") + return + + logger.info(f"Copying {source} -> {destination}") + return_code, output = call_crane_copy(source, destination) + if return_code != 0: + raise PushAnyscaleImageError(f"Crane copy failed: {output}") + + +@click.command() +@click.option( + "--python-version", + type=str, + required=True, + help="Python version (e.g., '3.10')", +) +@click.option( + "--platform", + type=str, + required=True, + help="Platform (e.g., 'cpu', 'cu12.3.2-cudnn9')", +) +@click.option( + "--image-type", + type=str, + default="ray", + help="Image type (e.g., 'ray', 'ray-llm', 'ray-ml')", +) +@click.option( + "--upload", + is_flag=True, + default=False, + help="Actually push to registries. Without this flag, runs in dry-run mode.", +) +def main( + python_version: str, + platform: str, + image_type: str, + upload: bool, +) -> None: + """ + Push a Wanda-cached anyscale image to ECR, GCP, and Azure registries. + + NOTE: GCP and Azure authentication must be done BEFORE calling this script + (e.g., via gcloud_docker_login.sh and azure_docker_login.sh in the pipeline). + ECR authentication is handled internally. + """ + ci_init() + + dry_run = not upload + if dry_run: + logger.info("DRY RUN MODE - no images will be pushed") + + # Get required environment variables + rayci_work_repo = os.environ.get("RAYCI_WORK_REPO", _DOCKER_ECR_REPO) + rayci_build_id = os.environ.get("RAYCI_BUILD_ID") + + if not rayci_build_id: + raise PushAnyscaleImageError("RAYCI_BUILD_ID environment variable not set") + + # Construct source image from Wanda cache + wanda_image_name = _get_wanda_image_name(python_version, platform, image_type) + source_tag = f"{rayci_work_repo}:{rayci_build_id}-{wanda_image_name}" + + logger.info(f"Source image (Wanda): {source_tag}") + + # Authenticate with ECR (source registry) + ecr_registry = rayci_work_repo.split("/")[0] + ecr_docker_login(ecr_registry) + + # Verify source image exists + logger.info("Verifying source image in Wanda cache...") + if not _image_exists(source_tag): + raise PushAnyscaleImageError( + f"Source image not found in Wanda cache: {source_tag}" + ) + + # Get image tags + tags = _get_image_tags(python_version, platform) + canonical_tag = tags[0] + + logger.info(f"Canonical tag: {canonical_tag}") + logger.info(f"All tags: {tags}") + + # Push to all three registries (ECR, GCP, Azure) + # NOTE: Authentication for GCP/Azure must be done in the pipeline step BEFORE + # calling this script (e.g., via gcloud_docker_login.sh and azure_docker_login.sh). + registries = [ + (ecr_registry, "ECR"), + (_DOCKER_GCP_REGISTRY, "GCP"), + (_DOCKER_AZURE_REGISTRY, "Azure"), + ] + + for tag in tags: + for registry, name in registries: + dest_image = f"{registry}/anyscale/{image_type}:{tag}" + logger.info(f"Pushing to {name}: {dest_image}") + _copy_image(source_tag, dest_image, dry_run) + + logger.info("Successfully pushed anyscale images to all registries") + + +if __name__ == "__main__": + main() diff --git a/ci/ray_ci/automation/push_ray_image.py b/ci/ray_ci/automation/push_ray_image.py new file mode 100644 index 000000000000..4854aa54e4e5 --- /dev/null +++ b/ci/ray_ci/automation/push_ray_image.py @@ -0,0 +1,309 @@ +""" +Push Wanda-cached ray images to Docker Hub. + +This script copies ray images from the Wanda cache to Docker Hub with tags +matching the original format from docker_container.py. + +Supports multiple image types: + - ray: Standard ray image -> rayproject/ray + - ray-extra: Ray with extra deps -> rayproject/ray + - ray-llm: Ray for LLM workloads -> rayproject/ray-llm + - ray-llm-extra: Ray LLM with extra deps -> rayproject/ray-llm + +Example: + bazel run //ci/ray_ci/automation:push_ray_image -- \\ + --python-version 3.10 \\ + --platform cpu \\ + --image-type ray \\ + --upload + +Tag format: + - Nightly: nightly.YYMMDD.{sha[:6]}-py310-cpu + - Release: {release_name}.{sha[:6]}-py310-cpu + - Other: {sha[:6]}-py310-cpu + +Run with --help to see all options. +""" + +import logging +import os +import sys +from datetime import datetime, timezone as tz +from typing import List + +import click + +from ci.ray_ci.automation.crane_lib import ( + call_crane_copy, + call_crane_manifest, +) +from ci.ray_ci.docker_container import RAY_REPO_MAP +from ci.ray_ci.utils import ecr_docker_login + +# GPU_PLATFORM is the default GPU platform that gets aliased as "gpu" +# This must match the definition in ci/ray_ci/docker_container.py +GPU_PLATFORM = "cu12.1.1-cudnn8" + +# Default architecture (x86_64 gets no suffix) +DEFAULT_ARCHITECTURE = "x86_64" + +# Valid image types that can be pushed +VALID_IMAGE_TYPES = list(RAY_REPO_MAP.keys()) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(message)s", + stream=sys.stdout, +) +logger = logging.getLogger(__name__) + + +class PushRayImageError(Exception): + """Error raised when pushing ray images fails.""" + + +def _format_python_version_tag(python_version: str) -> str: + """ + Format python version as -py310 (no dots, with hyphen prefix). + + Examples: + 3.10 -> -py310 + 3.11 -> -py311 + """ + return f"-py{python_version.replace('.', '')}" + + +def _format_platform_tag(platform: str) -> str: + """ + Format platform as -cpu or shortened CUDA version. + + Examples: + cpu -> -cpu + cu11.7.1-cudnn8 -> -cu117 + cu12.1.1-cudnn8 -> -cu121 + """ + if platform == "cpu": + return "-cpu" + # cu11.7.1-cudnn8 -> ['cu11', '7', '1-cudnn8'] -> -cu117 + versions = platform.split(".") + return f"-{versions[0]}{versions[1]}" + + +def _format_architecture_tag(architecture: str) -> str: + """ + Format architecture as suffix (empty for x86_64, -aarch64 for aarch64). + + Examples: + x86_64 -> "" + aarch64 -> -aarch64 + """ + if architecture == DEFAULT_ARCHITECTURE: + return "" + return f"-{architecture}" + + +def _generate_image_tags( + commit: str, + python_version: str, + platform: str, + architecture: str = DEFAULT_ARCHITECTURE, +) -> List[str]: + """ + Generate destination tags matching the original ray docker image format. + + Tag format: + {version_prefix}{py_tag}{platform_tag}{arch_tag} + + Version prefix: + - Nightly (master + nightly schedule): nightly.YYMMDD.{sha[:6]} + - Release branches: {release_name}.{sha[:6]} + - Other: {sha[:6]} + + For GPU_PLATFORM, also generates -gpu alias tags. + """ + branch = os.environ.get("BUILDKITE_BRANCH", "") + schedule = os.environ.get("RAYCI_SCHEDULE", "") + + sha_tag = commit[:6] + formatted_date = datetime.now(tz.utc).strftime("%y%m%d") + + # Generate version prefix + if branch == "master" and schedule == "nightly": + version_tags = [f"nightly.{formatted_date}.{sha_tag}"] + elif branch.startswith("releases/"): + release_name = branch[len("releases/") :] + version_tags = [f"{release_name}.{sha_tag}"] + else: + version_tags = [sha_tag] + + py_tag = _format_python_version_tag(python_version) + arch_tag = _format_architecture_tag(architecture) + + # For GPU_PLATFORM, also create -gpu alias + platform_tags = [_format_platform_tag(platform)] + if platform == GPU_PLATFORM: + platform_tags.append("-gpu") + + tags = [] + for version in version_tags: + for ptag in platform_tags: + tags.append(f"{version}{py_tag}{ptag}{arch_tag}") + + return tags + + +def _get_wanda_image_name( + image_type: str, + python_version: str, + platform: str, + architecture: str = DEFAULT_ARCHITECTURE, +) -> str: + """ + Get the wanda-cached image name for the given image type. + + Wanda image naming follows the pattern: + {image_type}-py{version}-{platform}{arch_suffix} + + Examples: + ray-py3.10-cpu + ray-extra-py3.10-cu12.1.1-cudnn8 + ray-llm-py3.11-cu12.8.1-cudnn + """ + arch_suffix = _format_architecture_tag(architecture) + if platform == "cpu": + return f"{image_type}-py{python_version}-cpu{arch_suffix}" + else: + return f"{image_type}-py{python_version}-{platform}{arch_suffix}" + + +def _image_exists(tag: str) -> bool: + """Check if a container image manifest exists using crane.""" + return_code, _ = call_crane_manifest(tag) + return return_code == 0 + + +def _copy_image(source: str, destination: str, dry_run: bool = False) -> None: + """Copy a container image from source to destination using crane.""" + if dry_run: + logger.info(f"DRY RUN: Would copy {source} -> {destination}") + return + + logger.info(f"Copying {source} -> {destination}") + return_code, output = call_crane_copy(source, destination) + if return_code != 0: + raise PushRayImageError(f"Crane copy failed: {output}") + logger.info(f"Successfully copied to {destination}") + + +@click.command() +@click.option( + "--python-version", + type=str, + required=True, + help="Python version (e.g., '3.10')", +) +@click.option( + "--platform", + type=str, + required=True, + help="Platform (e.g., 'cpu', 'cu11.7.1-cudnn8')", +) +@click.option( + "--image-type", + type=click.Choice(VALID_IMAGE_TYPES), + default="ray", + help="Image type (e.g., 'ray', 'ray-extra', 'ray-llm', 'ray-llm-extra')", +) +@click.option( + "--architecture", + type=str, + default=DEFAULT_ARCHITECTURE, + help="Architecture (e.g., 'x86_64', 'aarch64')", +) +@click.option( + "--upload", + is_flag=True, + default=False, + help="Actually push to Docker Hub. Without this flag, runs in dry-run mode.", +) +def main( + python_version: str, + platform: str, + image_type: str, + architecture: str, + upload: bool, +) -> None: + """ + Push a Wanda-cached ray image to Docker Hub. + + Handles authentication for ECR (source/Wanda cache) and Docker Hub + (destination via copy_files.py). + + Supports multiple image types which map to Docker Hub repos: + - ray, ray-extra -> rayproject/ray + - ray-llm, ray-llm-extra -> rayproject/ray-llm + - ray-ml, ray-ml-extra -> rayproject/ray-ml + + Tags are generated matching the original docker_container.py format: + - Nightly: nightly.YYMMDD.{sha[:6]}-py310-cpu + - Release: {release_name}.{sha[:6]}-py310-cpu + + For GPU_PLATFORM (cu12.1.1-cudnn8), also pushes with -gpu alias tag. + """ + dry_run = not upload + if dry_run: + logger.info("DRY RUN MODE - no images will be pushed") + + # Get required environment variables + rayci_work_repo = os.environ.get("RAYCI_WORK_REPO") + rayci_build_id = os.environ.get("RAYCI_BUILD_ID") + commit = os.environ.get("BUILDKITE_COMMIT") + + required = { + "RAYCI_WORK_REPO": rayci_work_repo, + "RAYCI_BUILD_ID": rayci_build_id, + "BUILDKITE_COMMIT": commit, + } + missing = [k for k, v in required.items() if not v] + if missing: + raise PushRayImageError(f"Missing required env vars: {', '.join(missing)}") + + # Determine destination Docker Hub repository from image type + docker_hub_repo = f"rayproject/{RAY_REPO_MAP[image_type]}" + logger.info(f"Image type: {image_type} -> Docker Hub repo: {docker_hub_repo}") + + # Construct source image from Wanda cache + wanda_image_name = _get_wanda_image_name( + image_type, python_version, platform, architecture + ) + source_tag = f"{rayci_work_repo}:{rayci_build_id}-{wanda_image_name}" + + # Generate destination tags (may include aliases like -gpu for GPU_PLATFORM) + destination_tags = _generate_image_tags( + commit, python_version, platform, architecture + ) + + logger.info(f"Source image (Wanda): {source_tag}") + logger.info(f"Destination tags: {destination_tags}") + + # Authenticate with ECR (source registry) + # Docker Hub auth is handled by copy_files.py --destination docker_login + ecr_registry = rayci_work_repo.split("/")[0] + ecr_docker_login(ecr_registry) + + # Verify source image exists + logger.info("Verifying source image in Wanda cache...") + if not _image_exists(source_tag): + raise PushRayImageError(f"Source image not found in Wanda cache: {source_tag}") + + # Copy image to Docker Hub with all tags + for tag in destination_tags: + full_destination = f"{docker_hub_repo}:{tag}" + _copy_image(source_tag, full_destination, dry_run) + + logger.info(f"Successfully pushed {image_type} image with tags: {destination_tags}") + + +if __name__ == "__main__": + main() diff --git a/ci/ray_ci/automation/test_push_anyscale_image.py b/ci/ray_ci/automation/test_push_anyscale_image.py new file mode 100644 index 000000000000..2c0e33cd4b90 --- /dev/null +++ b/ci/ray_ci/automation/test_push_anyscale_image.py @@ -0,0 +1,203 @@ +import sys +from unittest import mock + +import pytest + +from ci.ray_ci.automation.push_anyscale_image import ( + GPU_PLATFORM, + _format_platform_tag, + _format_python_version_tag, + _get_image_tags, + _get_wanda_image_name, +) + + +class TestFormatPythonVersionTag: + @pytest.mark.parametrize( + ("python_version", "expected"), + [ + ("3.10", "-py310"), + ("3.11", "-py311"), + ("3.12", "-py312"), + ("3.9", "-py39"), + ], + ) + def test_format_python_version_tag(self, python_version, expected): + assert _format_python_version_tag(python_version) == expected + + +class TestFormatPlatformTag: + @pytest.mark.parametrize( + ("platform", "expected"), + [ + ("cpu", "-cpu"), + ("cu11.7.1-cudnn8", "-cu117"), + ("cu11.8.0-cudnn8", "-cu118"), + ("cu12.1.1-cudnn8", "-cu121"), + ("cu12.3.2-cudnn9", "-cu123"), + ("cu12.8.1-cudnn", "-cu128"), + ], + ) + def test_format_platform_tag(self, platform, expected): + assert _format_platform_tag(platform) == expected + + +class TestGetWandaImageName: + @pytest.mark.parametrize( + ("python_version", "platform", "image_type", "expected"), + [ + ("3.10", "cpu", "ray", "ray-anyscale-py3.10-cpu"), + ("3.11", "cu12.1.1-cudnn8", "ray", "ray-anyscale-py3.11-cu12.1.1-cudnn8"), + ("3.10", "cpu", "ray-llm", "ray-llm-anyscale-py3.10-cpu"), + ( + "3.11", + "cu12.8.1-cudnn", + "ray-llm", + "ray-llm-anyscale-py3.11-cu12.8.1-cudnn", + ), + ("3.10", "cpu", "ray-ml", "ray-ml-anyscale-py3.10-cpu"), + ( + "3.11", + "cu12.3.2-cudnn9", + "ray-ml", + "ray-ml-anyscale-py3.11-cu12.3.2-cudnn9", + ), + ], + ) + def test_get_wanda_image_name(self, python_version, platform, image_type, expected): + assert _get_wanda_image_name(python_version, platform, image_type) == expected + + +class TestGetImageTags: + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "BUILDKITE_COMMIT": "abc123def456", + "RAYCI_BUILD_ID": "build-123", + }, + ) + def test_master_branch_tags(self): + tags = _get_image_tags(python_version="3.10", platform="cpu") + + assert tags == [ + "abc123-py310-cpu", + "build-123-py310-cpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "BUILDKITE_COMMIT": "abc123def456", + "RAYCI_BUILD_ID": "build-123", + }, + ) + def test_master_branch_gpu_platform_includes_alias(self): + tags = _get_image_tags(python_version="3.10", platform=GPU_PLATFORM) + + assert tags == [ + "abc123-py310-cu121", + "abc123-py310-gpu", + "build-123-py310-cu121", + "build-123-py310-gpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "BUILDKITE_COMMIT": "abc123def456", + "RAYCI_BUILD_ID": "", + }, + ) + def test_master_branch_no_build_id(self): + tags = _get_image_tags(python_version="3.11", platform="cu12.3.2-cudnn9") + + assert tags == ["abc123-py311-cu123"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "BUILDKITE_COMMIT": "abc123def456", + "RAYCI_BUILD_ID": "build-456", + }, + ) + def test_release_branch_tags(self): + tags = _get_image_tags(python_version="3.10", platform="cpu") + + assert tags == [ + "2.44.0.abc123-py310-cpu", + "build-456-py310-cpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "BUILDKITE_COMMIT": "abc123def456", + "RAYCI_BUILD_ID": "build-456", + }, + ) + def test_release_branch_gpu_platform_includes_alias(self): + tags = _get_image_tags(python_version="3.10", platform=GPU_PLATFORM) + + assert tags == [ + "2.44.0.abc123-py310-cu121", + "2.44.0.abc123-py310-gpu", + "build-456-py310-cu121", + "build-456-py310-gpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "feature-branch", + "BUILDKITE_COMMIT": "abc123def456", + "BUILDKITE_PULL_REQUEST": "123", + "RAYCI_BUILD_ID": "build-789", + }, + ) + def test_pr_branch_tags(self): + tags = _get_image_tags(python_version="3.12", platform="cpu") + + assert tags == [ + "pr-123.abc123-py312-cpu", + "build-789-py312-cpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "feature-branch", + "BUILDKITE_COMMIT": "abc123def456", + "BUILDKITE_PULL_REQUEST": "false", + "RAYCI_BUILD_ID": "build-789", + }, + ) + def test_non_pr_feature_branch_tags(self): + tags = _get_image_tags(python_version="3.10", platform="cpu") + + assert tags == [ + "abc123-py310-cpu", + "build-789-py310-cpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "feature-branch", + "BUILDKITE_COMMIT": "abc123def456", + "BUILDKITE_PULL_REQUEST": "false", + "RAYCI_BUILD_ID": "", + }, + ) + def test_feature_branch_no_build_id(self): + tags = _get_image_tags(python_version="3.10", platform="cpu") + + assert tags == ["abc123-py310-cpu"] + + +if __name__ == "__main__": + sys.exit(pytest.main(["-vv", __file__])) diff --git a/ci/ray_ci/automation/test_push_ray_image.py b/ci/ray_ci/automation/test_push_ray_image.py new file mode 100644 index 000000000000..8ee884669fcd --- /dev/null +++ b/ci/ray_ci/automation/test_push_ray_image.py @@ -0,0 +1,212 @@ +import sys +from unittest import mock + +import pytest + +from ci.ray_ci.automation.push_ray_image import ( + GPU_PLATFORM, + _format_architecture_tag, + _format_platform_tag, + _format_python_version_tag, + _generate_image_tags, + _get_wanda_image_name, +) + + +class TestFormatPythonVersionTag: + @pytest.mark.parametrize( + ("python_version", "expected"), + [ + ("3.10", "-py310"), + ("3.11", "-py311"), + ("3.12", "-py312"), + ("3.9", "-py39"), + ], + ) + def test_format_python_version_tag(self, python_version, expected): + assert _format_python_version_tag(python_version) == expected + + +class TestFormatPlatformTag: + @pytest.mark.parametrize( + ("platform", "expected"), + [ + ("cpu", "-cpu"), + ("cu11.7.1-cudnn8", "-cu117"), + ("cu11.8.0-cudnn8", "-cu118"), + ("cu12.1.1-cudnn8", "-cu121"), + ("cu12.3.2-cudnn9", "-cu123"), + ("cu12.8.1-cudnn", "-cu128"), + ], + ) + def test_format_platform_tag(self, platform, expected): + assert _format_platform_tag(platform) == expected + + +class TestFormatArchitectureTag: + @pytest.mark.parametrize( + ("architecture", "expected"), + [ + ("x86_64", ""), + ("aarch64", "-aarch64"), + ], + ) + def test_format_architecture_tag(self, architecture, expected): + assert _format_architecture_tag(architecture) == expected + + +class TestGetWandaImageName: + @pytest.mark.parametrize( + ("image_type", "python_version", "platform", "architecture", "expected"), + [ + ("ray", "3.10", "cpu", "x86_64", "ray-py3.10-cpu"), + ("ray", "3.11", "cu12.1.1-cudnn8", "x86_64", "ray-py3.11-cu12.1.1-cudnn8"), + ("ray", "3.10", "cpu", "aarch64", "ray-py3.10-cpu-aarch64"), + ("ray-extra", "3.10", "cpu", "x86_64", "ray-extra-py3.10-cpu"), + ( + "ray-extra", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-extra-py3.11-cu12.8.1-cudnn", + ), + ( + "ray-llm", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-llm-py3.11-cu12.8.1-cudnn", + ), + ( + "ray-llm-extra", + "3.11", + "cu12.8.1-cudnn", + "x86_64", + "ray-llm-extra-py3.11-cu12.8.1-cudnn", + ), + ], + ) + def test_get_wanda_image_name( + self, image_type, python_version, platform, architecture, expected + ): + assert ( + _get_wanda_image_name(image_type, python_version, platform, architecture) + == expected + ) + + +class TestGenerateImageTags: + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "nightly", + }, + ) + @mock.patch("ci.ray_ci.automation.push_ray_image.datetime") + def test_nightly_tags(self, mock_datetime): + mock_datetime.now.return_value.strftime.return_value = "260107" + + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["nightly.260107.abc123-py310-cpu"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "nightly", + }, + ) + @mock.patch("ci.ray_ci.automation.push_ray_image.datetime") + def test_nightly_tags_gpu_platform_includes_alias(self, mock_datetime): + mock_datetime.now.return_value.strftime.return_value = "260107" + + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform=GPU_PLATFORM, + architecture="x86_64", + ) + + assert tags == [ + "nightly.260107.abc123-py310-cu121", + "nightly.260107.abc123-py310-gpu", + ] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "RAYCI_SCHEDULE": "", + }, + ) + def test_release_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.11", + platform="cu12.3.2-cudnn9", + architecture="x86_64", + ) + + assert tags == ["2.44.0.abc123-py311-cu123"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "releases/2.44.0", + "RAYCI_SCHEDULE": "", + }, + ) + def test_release_tags_aarch64(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="aarch64", + ) + + assert tags == ["2.44.0.abc123-py310-cpu-aarch64"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "feature-branch", + "RAYCI_SCHEDULE": "", + }, + ) + def test_other_branch_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.12", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["abc123-py312-cpu"] + + @mock.patch.dict( + "os.environ", + { + "BUILDKITE_BRANCH": "master", + "RAYCI_SCHEDULE": "", # Not nightly + }, + ) + def test_master_non_nightly_tags(self): + tags = _generate_image_tags( + commit="abc123def456", + python_version="3.10", + platform="cpu", + architecture="x86_64", + ) + + assert tags == ["abc123-py310-cpu"] + + +if __name__ == "__main__": + sys.exit(pytest.main(["-vv", __file__]))