Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 97 additions & 151 deletions .buildkite/rllib.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,41 +24,20 @@ steps:
tags: cibase

# tests
- label: ":brain: rllib: algorithm, model and others"
- label: ":brain: rllib: component testing"
tags: rllib_directly
parallelism: 4
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual
--only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core
--except-tags gpu,multi_gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibbuild

- label: ":brain: rllib: learning tests pytorch"
tags: rllib
parallelism: 5
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
--except-tags tf_only,tf2_only,gpu,multi_gpu,learning_tests_pytorch_use_all_core
--test-arg --framework=torch
--build-name rllibbuild-py3.10
--python-version 3.10
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags learning_tests_pytorch_use_all_core
--except-tags tf_only,tf2_only,gpu,multi_gpu
--test-arg --framework=torch
--skip-ray-installation
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibbuild

- label: ":brain: rllib: examples"
tags: rllib
parallelism: 6
Expand All @@ -67,30 +46,38 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2
--only-tags examples
--except-tags multi_gpu,gpu,examples_use_all_core
--except-tags gpu,multi_gpu,manual,examples_use_all_core
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags examples_use_all_core
--skip-ray-installation
--except-tags multi_gpu,gpu
--except-tags gpu,multi_gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation
depends_on: rllibbuild

- label: ":brain: rllib: tests dir"
tags: rllib_directly
parallelism: 2
- label: ":brain: rllib: learning tests"
tags: rllib
parallelism: 5
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
--only-tags tests_dir
--except-tags multi_gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--only-tags learning_tests
--except-tags gpu,multi_gpu,learning_tests_pytorch_use_all_core,manual
--test-arg --framework=torch
--build-name rllibbuild-py3.10
--python-version 3.10
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags learning_tests_pytorch_use_all_core,manual
--except-tags gpu,multi_gpu
--test-arg --framework=torch
--skip-ray-installation
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibbuild
Expand All @@ -106,24 +93,43 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--only-tags gpu
--except-tags multi_gpu,manual
--test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--test-env=RLLIB_NUM_GPUS=1
--build-name rllibgpubuild-py3.10
--python-version 3.10
depends_on: rllibgpubuild

- label: ":brain: rllib: multi-gpu tests"
tags:
- rllib_gpu
- gpu
- skip-on-microcheck
parallelism: 5
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--parallelism-per-worker 2
--gpus 4
--only-tags multi_gpu
--except-tags manual
--build-name rllibgpubuild-py3.10
--python-version 3.10
depends_on: rllibgpubuild

- label: ":brain: rllib: data tests"
tags:
- data
- rllib
- disabled # Tests of this tag do not exist any more.
- disabled # Tests of this tag do not exist anymore.
instance_type: large
commands:
# learning tests pytorch
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--parallelism-per-worker 3
--only-tags learning_tests_with_ray_data
--except-tags multi_gpu,gpu,tf_only,tf2_only
--except-tags gpu,multi_gpu,manual
--test-arg --framework=torch
--build-name rllibbuild-py3.10
--python-version 3.10
Expand All @@ -132,32 +138,13 @@ steps:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--parallelism-per-worker 3
--only-tags ray_data
--except-tags learning_tests_with_ray_data,multi_gpu,gpu
--except-tags learning_tests_with_ray_data,gpu,multi_gpu,manual
--skip-ray-installation
--build-name rllibbuild-py3.10
--python-version 3.10
# reuse the same docker image as the previous run
depends_on: rllibbuild

- label: ":brain: rllib: benchmarks"
tags: rllib
instance_type: medium
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags torch_2.x_only_benchmark
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibbuild

# - label: ":brain: rllib: memory leak pytorch tests"
# tags: rllib
# instance_type: medium
# commands:
# - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
# --only-tags memory_leak_tests
# --except-tags flaky
# --test-arg --framework=torch
# depends_on: rllibbuild

- label: ":brain: rllib: doc tests"
tags:
- rllib_directly
Expand All @@ -167,8 +154,8 @@ steps:
commands:
# doc tests
- bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... rllib
--except-tags gpu
--only-tags doctest
--except-tags gpu,manual
--parallelism-per-worker 2
--build-name rllibbuild-py3.10
--python-version 3.10
Expand All @@ -179,6 +166,7 @@ steps:
--skip-ray-installation
--build-name rllibbuild-py3.10
--python-version 3.10
# documentation test
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--only-tags documentation
--parallelism-per-worker 2
Expand All @@ -187,39 +175,57 @@ steps:
--python-version 3.10
depends_on: rllibbuild

- label: ":brain: rllib: multi-gpu tests"
- label: ":brain: rllib: flaky component & examples tests"
key: rllib_flaky_tests_02
tags:
- rllib_gpu
- gpu
- skip-on-microcheck
parallelism: 5
instance_type: gpu-large
- rllib
- rllib_flaky
- skip-on-premerge
instance_type: large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
--workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
--parallelism-per-worker 2
--gpus 4
--only-tags multi_gpu
--build-name rllibgpubuild-py3.10
# components
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core
--except-tags learning_tests,examples,documentation,gpu,multi_gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibgpubuild

- label: ":brain: rllib: flaky multi-gpu tests"
key: rllib_flaky_multi_gpu_tests
# examples
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags examples
--except-tags multi_gpu,gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run

- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags examples_use_all_core
--except-tags gpu,multi_gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run
depends_on: rllibbuild
soft_fail: true

- label: ":brain: rllib: flaky learning tests"
key: rllib_flaky_tests_01
tags:
- rllib_gpu
- gpu
- rllib
- rllib_flaky
- skip-on-premerge
instance_type: gpu-large
instance_type: large
commands:
# torch
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
--parallelism-per-worker 2
--gpus 4
--only-tags multi_gpu
--build-name rllibgpubuild-py3.10
--only-tags learning_tests,learning_tests_with_ray_data
--except-tags gpu,multi_gpu,manual
--test-arg --framework=torch
--build-name rllibbuild-py3.10
--python-version 3.10
depends_on: rllibgpubuild
depends_on: rllibbuild
soft_fail: true

- label: ":brain: rllib: flaky gpu tests"
Expand All @@ -233,89 +239,29 @@ steps:
commands:
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
--only-tags gpu
--except-tags multi_gpu,manual
--test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--test-env=RLLIB_NUM_GPUS=1
--build-name rllibgpubuild-py3.10
--python-version 3.10
depends_on: rllibgpubuild
soft_fail: true

- label: ":brain: rllib: flaky tests (learning tests)"
key: rllib_flaky_tests_01
- label: ":brain: rllib: flaky multi-gpu tests"
key: rllib_flaky_multi_gpu_tests
tags:
- rllib
- rllib_gpu
- gpu
- rllib_flaky
- skip-on-premerge
instance_type: large
instance_type: gpu-large
commands:
# torch
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
--only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous
--except-tags tf_only,tf2_only,multi_gpu,gpu
--test-arg --framework=torch
--build-name rllibbuild-py3.10
--python-version 3.10

# tf2-static-graph
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
--only-tags tf_only
--except-tags torch_only,tf2_only,no_tf_static_graph,multi_gpu,gpu
--test-arg --framework=tf
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run
# tf2-eager-tracing
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
--only-tags tf2_only
--except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing,gpu
--test-arg --framework=tf2
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run
depends_on: rllibbuild
soft_fail: true

- label: ":brain: rllib: flaky tests (examples/rlmodule/models/tests_dir)"
key: rllib_flaky_tests_02
tags:
- rllib
- rllib_flaky
- skip-on-premerge
instance_type: large
commands:
# examples
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags examples
--except-tags multi_gpu,gpu
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10

# rlmodule tests
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags rlm
--except-tags multi_gpu,gpu
--test-env RLLIB_ENABLE_RL_MODULE=1
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run

# algorithm, models
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run

# tests/ dir
- bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
--only-tags tests_dir
--except-tags multi_gpu,gpu,manual
--test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
--build-name rllibbuild-py3.10
--parallelism-per-worker 2
--gpus 4
--only-tags multi_gpu
--except-tags manual
--build-name rllibgpubuild-py3.10
--python-version 3.10
--skip-ray-installation # reuse the same docker image as the previous run
depends_on: rllibbuild
depends_on: rllibgpubuild
soft_fail: true
Loading