From 38d0a39d39364ab70f7fecd7da702c855ed502d4 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 11 Dec 2025 15:20:53 +0000 Subject: [PATCH 01/12] remove `tests_dir` Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 29 +++-------------------------- rllib/BUILD.bazel | 6 ++---- 2 files changed, 5 insertions(+), 30 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 2854adf318e6..3a6d17975999 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -31,7 +31,7 @@ steps: commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 - --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual + --except-tags learning_tests,memory_leak_tests,examples,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 @@ -81,20 +81,6 @@ steps: --python-version 3.10 depends_on: rllibbuild - - label: ":brain: rllib: tests dir" - tags: rllib_directly - parallelism: 2 - instance_type: large - commands: - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 - --only-tags tests_dir - --except-tags multi_gpu,manual - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 - --build-name rllibbuild-py3.10 - --python-version 3.10 - depends_on: rllibbuild - - label: ":brain: rllib: gpu tests" tags: - rllib_gpu @@ -275,7 +261,7 @@ steps: depends_on: rllibbuild soft_fail: true - - label: ":brain: rllib: flaky tests (examples/rlmodule/models/tests_dir)" + - label: ":brain: rllib: flaky tests (examples/rlmodule/models)" key: rllib_flaky_tests_02 tags: - rllib @@ -303,16 +289,7 @@ steps: # algorithm, models - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 - --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 - --build-name rllibbuild-py3.10 - --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run - - # tests/ dir - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 - --only-tags tests_dir - --except-tags multi_gpu,gpu,manual + --except-tags learning_tests,memory_leak_tests,examples,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 500a86c2d696..cb0cf4469340 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -28,8 +28,6 @@ # - Algorithm tests, tagged "algorithms_dir". -# - Tests directory (everything in rllib/tests/...), tagged: "tests_dir" - # - Examples directory (everything in rllib/examples/...), tagged: "examples" # - Memory leak tests tagged "memory_leak_tests". @@ -1969,7 +1967,7 @@ py_test( tags = [ "exclusive", "team:rllib", - "tests_dir", + "algorithms_dir", ], ) @@ -2300,7 +2298,7 @@ py_test( srcs = ["env/tests/test_multi_agent_env.py"], tags = [ "team:rllib", - "tests_dir", + "env", ], ) From 5139058c66ac7a52227bd00dbdd7261d7addc51e Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 11 Dec 2025 15:27:14 +0000 Subject: [PATCH 02/12] Rename `algorithms_dir` to `algorithms` Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 79 +++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index cb0cf4469340..429679dcb8b9 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -1854,7 +1854,7 @@ py_test( # Algorithms (Compilation, Losses, simple functionality tests) # rllib/algorithms/ # -# Tag: algorithms_dir +# Tag: algorithms # -------------------------------------------------------------------- # Generic (all Algorithms) @@ -1865,8 +1865,7 @@ py_test( srcs = ["algorithms/tests/test_algorithm.py"], data = ["offline/tests/data/cartpole/small.json"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1876,8 +1875,7 @@ py_test( size = "medium", srcs = ["algorithms/tests/test_algorithm_config.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1887,8 +1885,7 @@ py_test( size = "medium", srcs = ["algorithms/tests/test_algorithm_export_checkpoint.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1898,8 +1895,7 @@ py_test( size = "medium", srcs = ["algorithms/tests/test_algorithm_save_load_checkpoint_learner.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1909,8 +1905,7 @@ py_test( size = "medium", srcs = ["algorithms/tests/test_algorithm_save_load_checkpoint_connectors.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1920,8 +1915,7 @@ py_test( size = "large", srcs = ["algorithms/tests/test_algorithm_rl_module_restore.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1931,8 +1925,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_algorithm_imports.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1942,8 +1935,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_registry.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "team:rllib", ], ) @@ -1953,8 +1945,7 @@ py_test( size = "large", srcs = ["algorithms/tests/test_env_runner_failures.py"], tags = [ - "algorithms_dir", - "algorithms_dir_generic", + "algorithms", "exclusive", "team:rllib", ], @@ -1965,9 +1956,9 @@ py_test( size = "large", srcs = ["algorithms/tests/test_node_failures.py"], tags = [ + "algorithms", "exclusive", "team:rllib", - "algorithms_dir", ], ) @@ -1980,7 +1971,7 @@ py_test( size = "large", srcs = ["algorithms/appo/tests/test_appo.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -1990,7 +1981,7 @@ py_test( size = "medium", srcs = ["algorithms/appo/tests/test_appo_learner.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2003,7 +1994,7 @@ py_test( # Include the offline data files. data = ["offline/tests/data/cartpole/cartpole-v1_large"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2016,7 +2007,7 @@ py_test( srcs = ["algorithms/cql/tests/test_cql_old_api_stack.py"], data = ["offline/tests/data/pendulum/small.json"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2027,7 +2018,7 @@ py_test( size = "large", srcs = ["algorithms/dqn/tests/test_dqn.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2038,7 +2029,7 @@ py_test( size = "large", srcs = ["algorithms/dreamerv3/tests/test_dreamerv3.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2049,7 +2040,7 @@ py_test( size = "large", srcs = ["algorithms/impala/tests/test_impala.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2059,7 +2050,7 @@ py_test( size = "small", srcs = ["algorithms/impala/tests/test_vtrace_v2.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2070,7 +2061,7 @@ py_test( size = "small", srcs = ["algorithms/impala/tests/test_vtrace_old_api_stack.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2086,7 +2077,7 @@ py_test( "offline/tests/data/pendulum/pendulum-v1_large", ], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2100,7 +2091,7 @@ py_test( "offline/tests/data/cartpole/large.json", ], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2111,7 +2102,7 @@ py_test( size = "medium", srcs = ["algorithms/ppo/tests/test_ppo.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2121,7 +2112,7 @@ py_test( size = "large", srcs = ["algorithms/ppo/tests/test_ppo_rl_module.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2131,7 +2122,7 @@ py_test( size = "large", srcs = ["algorithms/ppo/tests/test_ppo_learner.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2142,7 +2133,7 @@ py_test( size = "large", srcs = ["algorithms/sac/tests/test_sac.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2153,7 +2144,7 @@ py_test( size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer srcs = ["algorithms/tests/test_custom_resource.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2163,7 +2154,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_dependency_tf.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2173,7 +2164,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_dependency_torch.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2183,7 +2174,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_local.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2193,7 +2184,7 @@ py_test( size = "medium", srcs = ["algorithms/tests/test_ray_client.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2203,7 +2194,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_telemetry.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2213,7 +2204,7 @@ py_test( size = "small", srcs = ["algorithms/tests/test_nn_framework_import_errors.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2223,7 +2214,7 @@ py_test( size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer srcs = ["algorithms/tests/test_placement_groups.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], ) @@ -2297,8 +2288,8 @@ py_test( size = "large", srcs = ["env/tests/test_multi_agent_env.py"], tags = [ - "team:rllib", "env", + "team:rllib", ], ) From e28b616cd250e9d4e2a9ff9c1313be24ce3bc40f Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 11 Dec 2025 15:33:48 +0000 Subject: [PATCH 03/12] Remove unused tags Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 26 +++++--------------------- rllib/BUILD.bazel | 9 +-------- 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 3a6d17975999..2e8c26f3f0cf 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -24,13 +24,14 @@ steps: tags: cibase # tests - - label: ":brain: rllib: algorithm, model and others" + - label: ":brain: rllib: component testing" tags: rllib_directly parallelism: 4 instance_type: large commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 + --only-tags env,evaluation,models,offline,policy,utils,algorithms --except-tags learning_tests,memory_leak_tests,examples,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 @@ -45,14 +46,14 @@ steps: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 --only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous - --except-tags tf_only,tf2_only,gpu,multi_gpu,learning_tests_pytorch_use_all_core + --except-tags gpu,multi_gpu,learning_tests_pytorch_use_all_core --test-arg --framework=torch --build-name rllibbuild-py3.10 --python-version 3.10 - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --only-tags learning_tests_pytorch_use_all_core - --except-tags tf_only,tf2_only,gpu,multi_gpu + --except-tags gpu,multi_gpu --test-arg --framework=torch --skip-ray-installation --build-name rllibbuild-py3.10 @@ -237,27 +238,10 @@ steps: # torch - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous - --except-tags tf_only,tf2_only,multi_gpu,gpu + --except-tags multi_gpu,gpu --test-arg --framework=torch --build-name rllibbuild-py3.10 --python-version 3.10 - - # tf2-static-graph - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests - --only-tags tf_only - --except-tags torch_only,tf2_only,no_tf_static_graph,multi_gpu,gpu - --test-arg --framework=tf - --build-name rllibbuild-py3.10 - --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run - # tf2-eager-tracing - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests - --only-tags tf2_only - --except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing,gpu - --test-arg --framework=tf2 - --build-name rllibbuild-py3.10 - --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run depends_on: rllibbuild soft_fail: true diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 429679dcb8b9..96c529c85bd0 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -25,13 +25,10 @@ # - `offline` directory tests. # - `policy` directory tests. # - `utils` directory tests. - -# - Algorithm tests, tagged "algorithms_dir". +# - `algorithms` directory tests # - Examples directory (everything in rllib/examples/...), tagged: "examples" -# - Memory leak tests tagged "memory_leak_tests". - # Note: There is a special directory in examples: "documentation" which contains # all code that is linked to from within the RLlib docs. This code is tested # separately via the "documentation" tag. @@ -39,14 +36,10 @@ # Additional tags are: # - "team:rllib": Indicating that all tests in this file are the responsibility of # the RLlib Team. -# - "needs_gpu": Indicating that a test needs to have a GPU in order to run. # - "gpu": Indicating that a test may (but doesn't have to) be run in the GPU # pipeline, defined in .buildkite/pipeline.gpu.yml. # - "multi_gpu": Indicating that a test will definitely be run in the Large GPU # pipeline, defined in .buildkite/pipeline.gpu.large.yml. -# - "no_gpu": Indicating that a test should not be run in the GPU pipeline due -# to certain incompatibilities. -# - "no_tf_eager_tracing": Exclude this test from tf-eager tracing tests. # - "torch_only": Only run this test case with framework=torch. # Our .buildkite/pipeline.yml and .buildkite/pipeline.gpu.yml files execute all From fc60bfb8b5e211b4a1b838bceb641b102a33e3bc Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 11 Dec 2025 15:35:39 +0000 Subject: [PATCH 04/12] Remove `torch_only` tag Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 81 ----------------------------------------------- 1 file changed, 81 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 96c529c85bd0..1f64d48f58ec 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -40,7 +40,6 @@ # pipeline, defined in .buildkite/pipeline.gpu.yml. # - "multi_gpu": Indicating that a test will definitely be run in the Large GPU # pipeline, defined in .buildkite/pipeline.gpu.large.yml. -# - "torch_only": Only run this test case with framework=torch. # Our .buildkite/pipeline.yml and .buildkite/pipeline.gpu.yml files execute all # these tests in n different jobs. @@ -98,7 +97,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -119,7 +117,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -142,7 +139,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -163,7 +159,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -186,7 +181,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -208,7 +202,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", # Test is failing: https://github.com/ray-project/ray/issues/52270 "manual", ], @@ -234,7 +227,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -255,7 +247,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -278,7 +269,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -299,7 +289,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -322,7 +311,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -342,7 +330,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -365,7 +352,6 @@ py_test( "learning_tests_discrete", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -389,7 +375,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -413,7 +398,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", # Disabled: https://github.com/ray-project/ray/issues/50532 "manual", ], @@ -439,7 +423,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -466,7 +449,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -491,7 +473,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", # Disabled: https://github.com/ray-project/ray/issues/43808 "manual", ], @@ -519,7 +500,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", # Disabled: https://github.com/ray-project/ray/issues/50538 "manual", ], @@ -541,7 +521,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -562,7 +541,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -581,7 +559,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -602,7 +579,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", # Disabled: https://github.com/ray-project/ray/issues/47216 "manual", ], @@ -625,7 +601,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -648,7 +623,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -669,7 +643,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -693,7 +666,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -712,7 +684,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -732,7 +703,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -751,7 +721,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -772,7 +741,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -793,7 +761,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -815,7 +782,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -836,7 +802,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -860,7 +825,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -879,7 +843,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -900,7 +863,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -919,7 +881,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -944,7 +905,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -970,7 +930,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -994,7 +953,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1019,7 +977,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1038,7 +995,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1059,7 +1015,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1078,7 +1033,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1099,7 +1053,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1118,7 +1071,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1140,7 +1092,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1160,7 +1111,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1182,7 +1132,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1200,7 +1149,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1218,7 +1166,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1239,7 +1186,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1258,7 +1204,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1279,7 +1224,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1298,7 +1242,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1320,7 +1263,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1340,7 +1282,6 @@ py_test( "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1362,7 +1303,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1460,7 +1400,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1481,7 +1420,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1500,7 +1438,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1521,7 +1458,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1540,7 +1476,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1562,7 +1497,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1582,7 +1516,6 @@ py_test( "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", - "torch_only", ], ) @@ -1604,7 +1537,6 @@ py_test( "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1623,7 +1555,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1643,7 +1574,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1661,7 +1591,6 @@ py_test( "learning_tests", "learning_tests_discrete", "team:rllib", - "torch_only", ], ) @@ -1682,7 +1611,6 @@ py_test( "learning_tests_discrete", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1700,7 +1628,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1720,7 +1647,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1738,7 +1664,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1758,7 +1683,6 @@ py_test( "learning_tests_continuous", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -1778,7 +1702,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1800,7 +1723,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1818,7 +1740,6 @@ py_test( "learning_tests", "learning_tests_continuous", "team:rllib", - "torch_only", ], ) @@ -1839,7 +1760,6 @@ py_test( "learning_tests_continuous", "multi_gpu", "team:rllib", - "torch_only", ], ) @@ -2692,7 +2612,6 @@ py_test( tags = [ "offline", "team:rllib", - "torch_only", ], ) From e78d861b7b6aa87dac6ac5f5fa91b3f4a861cbc6 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 11 Dec 2025 16:42:51 +0000 Subject: [PATCH 05/12] Reformat Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 211 +++++++++++++++++-------------------- rllib/BUILD.bazel | 108 +++---------------- 2 files changed, 110 insertions(+), 209 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 2e8c26f3f0cf..d0e0324e6b1c 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -31,53 +31,53 @@ steps: commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 - --only-tags env,evaluation,models,offline,policy,utils,algorithms - --except-tags learning_tests,memory_leak_tests,examples,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual + --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core + --except-tags gpu,multi_gpu,manual --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 depends_on: rllibbuild - - label: ":brain: rllib: learning tests pytorch" + - label: ":brain: rllib: examples" tags: rllib - parallelism: 5 + parallelism: 6 instance_type: large commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 - --only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous - --except-tags gpu,multi_gpu,learning_tests_pytorch_use_all_core - --test-arg --framework=torch + --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2 + --only-tags examples + --except-tags gpu,multi_gpu,manual,examples_use_all_core + --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" - --only-tags learning_tests_pytorch_use_all_core - --except-tags gpu,multi_gpu - --test-arg --framework=torch - --skip-ray-installation + --only-tags examples_use_all_core + --except-tags gpu,multi_gpu,manual + --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 + --skip-ray-installation depends_on: rllibbuild - - label: ":brain: rllib: examples" + - label: ":brain: rllib: learning tests" tags: rllib - parallelism: 6 + parallelism: 5 instance_type: large commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2 - --only-tags examples - --except-tags multi_gpu,gpu,examples_use_all_core - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 + --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 + --only-tags learning_tests + --except-tags gpu,multi_gpu,learning_tests_pytorch_use_all_core,manual + --test-arg --framework=torch --build-name rllibbuild-py3.10 --python-version 3.10 - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" - --only-tags examples_use_all_core + --only-tags learning_tests_pytorch_use_all_core,manual + --except-tags gpu,multi_gpu + --test-arg --framework=torch --skip-ray-installation - --except-tags multi_gpu,gpu - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 depends_on: rllibbuild @@ -93,24 +93,43 @@ steps: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --only-tags gpu + --except-tags multi_gpu,manual --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --test-env=RLLIB_NUM_GPUS=1 --build-name rllibgpubuild-py3.10 --python-version 3.10 depends_on: rllibgpubuild + - label: ":brain: rllib: multi-gpu tests" + tags: + - rllib_gpu + - gpu + - skip-on-microcheck + parallelism: 5 + instance_type: gpu-large + commands: + - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib + --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" + --parallelism-per-worker 2 + --gpus 4 + --only-tags multi_gpu + --except-tags manual + --build-name rllibgpubuild-py3.10 + --python-version 3.10 + depends_on: rllibgpubuild + - label: ":brain: rllib: data tests" tags: - data - rllib - - disabled # Tests of this tag do not exist any more. + - disabled # Tests of this tag do not exist anymore. instance_type: large commands: # learning tests pytorch - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --parallelism-per-worker 3 --only-tags learning_tests_with_ray_data - --except-tags multi_gpu,gpu,tf_only,tf2_only + --except-tags gpu,multi_gpu,manual --test-arg --framework=torch --build-name rllibbuild-py3.10 --python-version 3.10 @@ -119,32 +138,13 @@ steps: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --parallelism-per-worker 3 --only-tags ray_data - --except-tags learning_tests_with_ray_data,multi_gpu,gpu + --except-tags learning_tests_with_ray_data,gpu,multi_gpu,manual --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 # reuse the same docker image as the previous run depends_on: rllibbuild - - label: ":brain: rllib: benchmarks" - tags: rllib - instance_type: medium - commands: - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags torch_2.x_only_benchmark - --build-name rllibbuild-py3.10 - --python-version 3.10 - depends_on: rllibbuild - - # - label: ":brain: rllib: memory leak pytorch tests" - # tags: rllib - # instance_type: medium - # commands: - # - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - # --only-tags memory_leak_tests - # --except-tags flaky - # --test-arg --framework=torch - # depends_on: rllibbuild - - label: ":brain: rllib: doc tests" tags: - rllib_directly @@ -154,8 +154,8 @@ steps: commands: # doc tests - bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... rllib - --except-tags gpu --only-tags doctest + --except-tags gpu,manual --parallelism-per-worker 2 --build-name rllibbuild-py3.10 --python-version 3.10 @@ -166,6 +166,7 @@ steps: --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 + # documentation test - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags documentation --parallelism-per-worker 2 @@ -174,39 +175,57 @@ steps: --python-version 3.10 depends_on: rllibbuild - - label: ":brain: rllib: multi-gpu tests" + - label: ":brain: rllib: flaky component & examples tests" + key: rllib_flaky_tests_02 tags: - - rllib_gpu - - gpu - - skip-on-microcheck - parallelism: 5 - instance_type: gpu-large + - rllib + - rllib_flaky + - skip-on-premerge + instance_type: large commands: - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" - --parallelism-per-worker 2 - --gpus 4 - --only-tags multi_gpu - --build-name rllibgpubuild-py3.10 + # components + - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 + --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core + --except-tags learning_tests,examples,documentation,gpu,multi_gpu,manual + --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 + --build-name rllibbuild-py3.10 --python-version 3.10 - depends_on: rllibgpubuild - - label: ":brain: rllib: flaky multi-gpu tests" - key: rllib_flaky_multi_gpu_tests + # examples + - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 + --only-tags examples + --except-tags multi_gpu,gpu,manual + --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 + --build-name rllibbuild-py3.10 + --python-version 3.10 + --skip-ray-installation # reuse the same docker image as the previous run + + - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 + --only-tags examples_use_all_core + --except-tags gpu,multi_gpu,manual + --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 + --build-name rllibbuild-py3.10 + --python-version 3.10 + --skip-ray-installation # reuse the same docker image as the previous run + depends_on: rllibbuild + soft_fail: true + + - label: ":brain: rllib: flaky learning tests" + key: rllib_flaky_tests_01 tags: - - rllib_gpu - - gpu + - rllib - rllib_flaky - skip-on-premerge - instance_type: gpu-large + instance_type: large commands: + # torch - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests - --parallelism-per-worker 2 - --gpus 4 - --only-tags multi_gpu - --build-name rllibgpubuild-py3.10 + --only-tags learning_tests,learning_tests_with_ray_data + --except-tags gpu,multi_gpu,manual + --test-arg --framework=torch + --build-name rllibbuild-py3.10 --python-version 3.10 - depends_on: rllibgpubuild + depends_on: rllibbuild soft_fail: true - label: ":brain: rllib: flaky gpu tests" @@ -220,6 +239,7 @@ steps: commands: - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --only-tags gpu + --except-tags multi_gpu,manual --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --test-env=RLLIB_NUM_GPUS=1 --build-name rllibgpubuild-py3.10 @@ -227,56 +247,21 @@ steps: depends_on: rllibgpubuild soft_fail: true - - label: ":brain: rllib: flaky tests (learning tests)" - key: rllib_flaky_tests_01 + - label: ":brain: rllib: flaky multi-gpu tests" + key: rllib_flaky_multi_gpu_tests tags: - - rllib + - rllib_gpu + - gpu - rllib_flaky - skip-on-premerge - instance_type: large + instance_type: gpu-large commands: - # torch - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests - --only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous - --except-tags multi_gpu,gpu - --test-arg --framework=torch - --build-name rllibbuild-py3.10 - --python-version 3.10 - depends_on: rllibbuild - soft_fail: true - - - label: ":brain: rllib: flaky tests (examples/rlmodule/models)" - key: rllib_flaky_tests_02 - tags: - - rllib - - rllib_flaky - - skip-on-premerge - instance_type: large - commands: - # examples - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 - --only-tags examples - --except-tags multi_gpu,gpu - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 - --build-name rllibbuild-py3.10 - --python-version 3.10 - - # rlmodule tests - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 - --only-tags rlm - --except-tags multi_gpu,gpu - --test-env RLLIB_ENABLE_RL_MODULE=1 - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 - --build-name rllibbuild-py3.10 - --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run - - # algorithm, models - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 - --except-tags learning_tests,memory_leak_tests,examples,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual - --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 - --build-name rllibbuild-py3.10 + --parallelism-per-worker 2 + --gpus 4 + --only-tags multi_gpu + --except-tags manual + --build-name rllibgpubuild-py3.10 --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run - depends_on: rllibbuild + depends_on: rllibgpubuild soft_fail: true diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 1f64d48f58ec..0b5a68772ba1 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -11,10 +11,7 @@ # Currently we have the following categories: # - Learning tests/regression, tagged: -# -- "learning_tests_[discrete|continuous]": distinguish discrete -# actions vs continuous actions. -# -- "crashing_cartpole" and "stateless_cartpole" to distinguish between -# simple CartPole and more advanced variants of it. +# -- "learning_tests": Tests for an algorithms expected return # -- "ray_data": Tests that rely on ray_data. # -- "learning_tests_with_ray_data": Learning tests that rely on ray_data. @@ -25,7 +22,9 @@ # - `offline` directory tests. # - `policy` directory tests. # - `utils` directory tests. -# - `algorithms` directory tests +# - `algorithms` directory tests. +# - `callbacks` directory tests. +# - `core` directory tests # - Examples directory (everything in rllib/examples/...), tagged: "examples" @@ -95,7 +94,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -114,7 +112,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -135,7 +132,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -157,7 +153,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -178,7 +173,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -199,7 +193,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", # Test is failing: https://github.com/ray-project/ray/issues/52270 @@ -223,7 +216,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -244,7 +236,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -266,7 +257,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -286,7 +276,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -307,7 +296,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -328,7 +316,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -349,7 +336,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "multi_gpu", "team:rllib", ], @@ -372,7 +358,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -395,7 +380,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/50532 @@ -420,7 +404,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -445,7 +428,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -469,8 +451,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_cartpole", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/43808 @@ -496,8 +476,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_cartpole", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/50538 @@ -518,7 +496,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -538,7 +515,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -556,7 +532,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -575,7 +550,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -598,7 +572,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -620,7 +593,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -640,7 +612,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -662,7 +633,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -682,7 +652,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -700,7 +669,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -718,7 +686,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -737,7 +704,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -758,7 +724,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -779,7 +744,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -799,7 +763,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -821,7 +784,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -840,7 +802,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -859,7 +820,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -878,7 +838,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -902,7 +861,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -927,7 +885,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -950,7 +907,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -974,7 +930,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -993,7 +948,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1012,7 +966,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1030,7 +983,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1049,7 +1001,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1069,7 +1020,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1089,7 +1039,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1108,7 +1057,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1128,7 +1076,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1147,7 +1094,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1164,7 +1110,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1183,7 +1128,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1201,7 +1145,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1220,7 +1163,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1240,7 +1182,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1260,7 +1201,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1279,7 +1219,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1299,7 +1238,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1320,7 +1258,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1340,7 +1277,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "multi_gpu", "team:rllib", ], @@ -1360,7 +1296,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1380,7 +1315,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "multi_gpu", "team:rllib", ], @@ -1398,7 +1332,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1417,7 +1350,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1435,7 +1367,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1454,7 +1385,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1474,7 +1404,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1494,7 +1423,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1513,7 +1441,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "team:rllib", ], @@ -1533,7 +1460,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "learning_tests_pytorch_use_all_core", "multi_gpu", "team:rllib", @@ -1553,7 +1479,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1572,7 +1497,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1589,7 +1513,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "team:rllib", ], ) @@ -1608,7 +1531,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_discrete", "multi_gpu", "team:rllib", ], @@ -1626,7 +1548,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1645,7 +1566,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1662,7 +1582,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1680,7 +1599,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "multi_gpu", "team:rllib", ], @@ -1700,7 +1618,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1721,7 +1638,6 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1738,7 +1654,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "team:rllib", ], ) @@ -1757,7 +1672,6 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_continuous", "multi_gpu", "team:rllib", ], @@ -2143,7 +2057,7 @@ py_test( size = "large", srcs = ["callbacks/tests/test_callbacks_on_algorithm.py"], tags = [ - "callbacks_dir", + "callbacks", "team:rllib", ], ) @@ -2153,7 +2067,7 @@ py_test( size = "medium", srcs = ["callbacks/tests/test_callbacks_on_env_runner.py"], tags = [ - "callbacks_dir", + "callbacks", "team:rllib", ], ) @@ -2164,7 +2078,7 @@ py_test( size = "medium", srcs = ["callbacks/tests/test_callbacks_old_api_stack.py"], tags = [ - "callbacks_dir", + "callbacks", "team:rllib", ], ) @@ -2174,7 +2088,7 @@ py_test( size = "medium", srcs = ["callbacks/tests/test_multicallback.py"], tags = [ - "callbacks_dir", + "callbacks", "team:rllib", ], ) @@ -2480,6 +2394,7 @@ py_test( "manual", "multi_gpu", "team:rllib", + "core", ], ) @@ -2493,6 +2408,7 @@ py_test( "exclusive", "multi_gpu", "team:rllib", + "core", ], ) @@ -2506,6 +2422,7 @@ py_test( "exclusive", "multi_gpu", "team:rllib", + "core", ], ) @@ -2519,6 +2436,7 @@ py_test( "exclusive", "multi_gpu", "team:rllib", + "core", ], ) @@ -3216,7 +3134,6 @@ py_test( tags = [ "examples", "exclusive", - "old_api_stack", "team:rllib", ], ) @@ -3230,7 +3147,6 @@ py_test( tags = [ "examples", "exclusive", - "old_api_stack", "team:rllib", ], ) From 9bc0cc0202cd42e3f84489041ddc977040418bf4 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 12 Dec 2025 12:34:04 +0000 Subject: [PATCH 06/12] code-review + pre-commit Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 4 ++-- rllib/BUILD.bazel | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index d0e0324e6b1c..2ececc155bde 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -74,8 +74,8 @@ steps: --python-version 3.10 - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" - --only-tags learning_tests_pytorch_use_all_core,manual - --except-tags gpu,multi_gpu + --only-tags learning_tests_pytorch_use_all_core + --except-tags gpu,multi_gpu,manual --test-arg --framework=torch --skip-ray-installation --build-name rllibbuild-py3.10 diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 0b5a68772ba1..33fc650c5f35 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -2390,11 +2390,11 @@ py_test( main = "core/learner/tests/test_learner_group.py", # TODO(#50114): mark as manual as it is flaky. tags = [ + "core", "exclusive", "manual", "multi_gpu", "team:rllib", - "core", ], ) @@ -2405,10 +2405,10 @@ py_test( args = ["TestLearnerGroupSyncUpdate"], main = "core/learner/tests/test_learner_group.py", tags = [ + "core", "exclusive", "multi_gpu", "team:rllib", - "core", ], ) @@ -2419,10 +2419,10 @@ py_test( args = ["TestLearnerGroupCheckpointRestore"], main = "core/learner/tests/test_learner_group.py", tags = [ + "core", "exclusive", "multi_gpu", "team:rllib", - "core", ], ) @@ -2433,10 +2433,10 @@ py_test( args = ["TestLearnerGroupSaveAndRestoreState"], main = "core/learner/tests/test_learner_group.py", tags = [ + "core", "exclusive", "multi_gpu", "team:rllib", - "core", ], ) From c8cd9b602901388116423686bc65afc82255c74a Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Thu, 18 Dec 2025 12:14:58 +0000 Subject: [PATCH 07/12] code-review Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 31 +++++---- rllib/BUILD.bazel | 125 ++++++++++++++++++------------------- 2 files changed, 80 insertions(+), 76 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 2ececc155bde..4b37edf69ef6 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -29,6 +29,7 @@ steps: parallelism: 4 instance_type: large commands: + # All tests to with tags for components without gpu or multi_gpu tags - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core @@ -43,6 +44,7 @@ steps: parallelism: 6 instance_type: large commands: + # Tests all examples without gpu, multi_gpu or examples_use_all_cores tag - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2 --only-tags examples @@ -50,6 +52,7 @@ steps: --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 + # Tests all examples without gpu or multi_gpu - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --only-tags examples_use_all_core @@ -57,7 +60,7 @@ steps: --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10 - --skip-ray-installation + --skip-ray-installation # reuse the same docker image as the previous run depends_on: rllibbuild - label: ":brain: rllib: learning tests" @@ -65,21 +68,23 @@ steps: parallelism: 5 instance_type: large commands: + # learning tests without a gpu - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3 --only-tags learning_tests - --except-tags gpu,multi_gpu,learning_tests_pytorch_use_all_core,manual + --except-tags gpu,multi_gpu,learning_tests_use_all_core,manual --test-arg --framework=torch --build-name rllibbuild-py3.10 --python-version 3.10 + # learning tests without a gpu but use all cores - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" - --only-tags learning_tests_pytorch_use_all_core + --only-tags learning_tests_use_all_core --except-tags gpu,multi_gpu,manual --test-arg --framework=torch - --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 + --skip-ray-installation # reuse the same docker image as the previous run depends_on: rllibbuild - label: ":brain: rllib: gpu tests" @@ -90,6 +95,7 @@ steps: parallelism: 5 instance_type: gpu commands: + # All gpu tagged tests - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --only-tags gpu @@ -108,6 +114,7 @@ steps: parallelism: 5 instance_type: gpu-large commands: + # All multi-gpu tagged tests - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2 @@ -125,7 +132,7 @@ steps: - disabled # Tests of this tag do not exist anymore. instance_type: large commands: - # learning tests pytorch + # learning tests with ray data (currently no tests) - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --parallelism-per-worker 3 --only-tags learning_tests_with_ray_data @@ -134,15 +141,14 @@ steps: --build-name rllibbuild-py3.10 --python-version 3.10 - # rllib unittests + # ray data tests (currently tested twice as they are all tagged core or offline and run above in component testing) - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --parallelism-per-worker 3 --only-tags ray_data --except-tags learning_tests_with_ray_data,gpu,multi_gpu,manual - --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 - # reuse the same docker image as the previous run + --skip-ray-installation # reuse the same docker image as the previous run depends_on: rllibbuild - label: ":brain: rllib: doc tests" @@ -163,16 +169,16 @@ steps: - bazel run //ci/ray_ci:test_in_docker -- //doc/... rllib --except-tags gpu,post_wheel_build,timeseries_libs,doctest --parallelism-per-worker 2 - --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 + --skip-ray-installation # reuse the same docker image as the previous run # documentation test - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags documentation --parallelism-per-worker 2 - --skip-ray-installation --build-name rllibbuild-py3.10 --python-version 3.10 + --skip-ray-installation # reuse the same docker image as the previous run depends_on: rllibbuild - label: ":brain: rllib: flaky component & examples tests" @@ -183,7 +189,7 @@ steps: - skip-on-premerge instance_type: large commands: - # components + # flaky components - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core --except-tags learning_tests,examples,documentation,gpu,multi_gpu,manual @@ -191,7 +197,7 @@ steps: --build-name rllibbuild-py3.10 --python-version 3.10 - # examples + # flaky examples - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 --only-tags examples --except-tags multi_gpu,gpu,manual @@ -200,6 +206,7 @@ steps: --python-version 3.10 --skip-ray-installation # reuse the same docker image as the previous run + # flaky examples use all core - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 --only-tags examples_use_all_core --except-tags gpu,multi_gpu,manual diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 33fc650c5f35..f21756983bd9 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -12,6 +12,7 @@ # - Learning tests/regression, tagged: # -- "learning_tests": Tests for an algorithms expected return +# -- "learning_tests_with_all_core": # -- "ray_data": Tests that rely on ray_data. # -- "learning_tests_with_ray_data": Learning tests that rely on ray_data. @@ -26,22 +27,18 @@ # - `callbacks` directory tests. # - `core` directory tests -# - Examples directory (everything in rllib/examples/...), tagged: "examples" +# - Examples directory (everything in rllib/examples/...), tagged: "examples" or "examples_use_all_core" # Note: There is a special directory in examples: "documentation" which contains # all code that is linked to from within the RLlib docs. This code is tested # separately via the "documentation" tag. # Additional tags are: -# - "team:rllib": Indicating that all tests in this file are the responsibility of -# the RLlib Team. -# - "gpu": Indicating that a test may (but doesn't have to) be run in the GPU -# pipeline, defined in .buildkite/pipeline.gpu.yml. -# - "multi_gpu": Indicating that a test will definitely be run in the Large GPU -# pipeline, defined in .buildkite/pipeline.gpu.large.yml. +# - "team:rllib": Indicating that all tests in this file are the responsibility of the RLlib Team. +# - "gpu": Test requires single GPU. +# - "multi_gpu": Test requires 2 or more GPUs -# Our .buildkite/pipeline.yml and .buildkite/pipeline.gpu.yml files execute all -# these tests in n different jobs. +# ".buildkite/rllib.rayci.yml" handles running of these tests based on the tags used. load("@rules_python//python:defs.bzl", "py_test") load("//bazel:python.bzl", "doctest", "py_test_module_list") @@ -112,7 +109,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -132,7 +129,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -173,7 +170,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -193,7 +190,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", # Test is failing: https://github.com/ray-project/ray/issues/52270 "manual", @@ -216,7 +213,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -236,7 +233,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -257,7 +254,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -276,7 +273,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -296,7 +293,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -358,7 +355,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -380,7 +377,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/50532 "manual", @@ -404,7 +401,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -428,7 +425,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -451,7 +448,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/43808 "manual", @@ -476,7 +473,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/50538 "manual", @@ -496,7 +493,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -515,7 +512,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -532,7 +529,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -550,7 +547,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", # Disabled: https://github.com/ray-project/ray/issues/47216 @@ -572,7 +569,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -593,7 +590,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -612,7 +609,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -633,7 +630,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -669,7 +666,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -686,7 +683,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -704,7 +701,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -724,7 +721,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -744,7 +741,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -763,7 +760,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -784,7 +781,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -802,7 +799,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -820,7 +817,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -838,7 +835,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -861,7 +858,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -885,7 +882,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -907,7 +904,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -930,7 +927,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -966,7 +963,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -983,7 +980,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1001,7 +998,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -1039,7 +1036,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1057,7 +1054,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1076,7 +1073,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -1128,7 +1125,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1145,7 +1142,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1163,7 +1160,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -1201,7 +1198,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1219,7 +1216,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1238,7 +1235,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -1350,7 +1347,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1367,7 +1364,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1385,7 +1382,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], @@ -1423,7 +1420,7 @@ py_test( "exclusive", "gpu", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1441,7 +1438,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "team:rllib", ], ) @@ -1460,7 +1457,7 @@ py_test( tags = [ "exclusive", "learning_tests", - "learning_tests_pytorch_use_all_core", + "learning_tests_use_all_core", "multi_gpu", "team:rllib", ], From fabf9e2750bb11dbc32b347b5fefebf1a301fd98 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Fri, 19 Dec 2025 11:27:38 +0000 Subject: [PATCH 08/12] remove ray_data Signed-off-by: Mark Towers --- .buildkite/rllib.rayci.yml | 28 +--------------------------- rllib/BUILD.bazel | 5 ----- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 4b37edf69ef6..47da13ab02c5 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -125,32 +125,6 @@ steps: --python-version 3.10 depends_on: rllibgpubuild - - label: ":brain: rllib: data tests" - tags: - - data - - rllib - - disabled # Tests of this tag do not exist anymore. - instance_type: large - commands: - # learning tests with ray data (currently no tests) - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --parallelism-per-worker 3 - --only-tags learning_tests_with_ray_data - --except-tags gpu,multi_gpu,manual - --test-arg --framework=torch - --build-name rllibbuild-py3.10 - --python-version 3.10 - - # ray data tests (currently tested twice as they are all tagged core or offline and run above in component testing) - - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib - --parallelism-per-worker 3 - --only-tags ray_data - --except-tags learning_tests_with_ray_data,gpu,multi_gpu,manual - --build-name rllibbuild-py3.10 - --python-version 3.10 - --skip-ray-installation # reuse the same docker image as the previous run - depends_on: rllibbuild - - label: ":brain: rllib: doc tests" tags: - rllib_directly @@ -227,7 +201,7 @@ steps: commands: # torch - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests - --only-tags learning_tests,learning_tests_with_ray_data + --only-tags learning_tests --except-tags gpu,multi_gpu,manual --test-arg --framework=torch --build-name rllibbuild-py3.10 diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index f21756983bd9..024b8e5e8b7b 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -13,8 +13,6 @@ # - Learning tests/regression, tagged: # -- "learning_tests": Tests for an algorithms expected return # -- "learning_tests_with_all_core": -# -- "ray_data": Tests that rely on ray_data. -# -- "learning_tests_with_ray_data": Learning tests that rely on ray_data. # - Folder-bound tests, tagged with the name of the top-level dir: # - `env` directory tests. @@ -2445,7 +2443,6 @@ py_test( tags = [ "core", "exclusive", - "ray_data", "team:rllib", ], ) @@ -2457,7 +2454,6 @@ py_test( tags = [ "core", "exclusive", - "ray_data", "team:rllib", ], ) @@ -2548,7 +2544,6 @@ py_test( data = ["offline/tests/data/cartpole/small.json"], tags = [ "offline", - "ray_data", "team:rllib", ], ) From aa5ae68987b31b9c4db1c1635b94a15eba306041 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Tue, 30 Dec 2025 16:40:40 +0000 Subject: [PATCH 09/12] Fix BUILD.bazel repeat test Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index ceaacbb13250..257c582d98b4 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -1776,7 +1776,7 @@ py_test( ) py_test( - name = "test_env_runner_failures", + name = "test_algo_env_runner_failures", size = "large", srcs = ["algorithms/tests/test_env_runner_failures.py"], tags = [ From 7d0927729c5aee73c7f430feec6f5762e2be41c8 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 31 Dec 2025 12:45:25 +0000 Subject: [PATCH 10/12] Remove test that doesn't exist anymore Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 257c582d98b4..7ba90b01c4e7 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -1775,18 +1775,6 @@ py_test( deps = [":conftest"], ) -py_test( - name = "test_algo_env_runner_failures", - size = "large", - srcs = ["algorithms/tests/test_env_runner_failures.py"], - tags = [ - "algorithms", - "exclusive", - "team:rllib", - ], - deps = [":conftest"], -) - py_test( name = "test_node_failures", size = "large", From 20da1f64a94acea46ba5fd2b560d27adc3e14555 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Mon, 12 Jan 2026 15:05:41 +0000 Subject: [PATCH 11/12] Change `algorithms_dir` to `algorithms` Signed-off-by: Mark Towers --- rllib/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel index 6bae3ff65ba9..dea54e03df22 100644 --- a/rllib/BUILD.bazel +++ b/rllib/BUILD.bazel @@ -1984,7 +1984,7 @@ py_test( size = "large", srcs = ["algorithms/tqc/tests/test_tqc.py"], tags = [ - "algorithms_dir", + "algorithms", "team:rllib", ], deps = [":conftest"], From 7db3c3012112ad42a9bfde66b6f76a8511ea19df Mon Sep 17 00:00:00 2001 From: Kamil Kaczmarek Date: Mon, 12 Jan 2026 21:35:57 -0800 Subject: [PATCH 12/12] add examples_use_all_core to the excluded tags Signed-off-by: Kamil Kaczmarek --- .buildkite/rllib.rayci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml index 47da13ab02c5..5db5aa7ac9d6 100644 --- a/.buildkite/rllib.rayci.yml +++ b/.buildkite/rllib.rayci.yml @@ -174,7 +174,7 @@ steps: # flaky examples - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3 --only-tags examples - --except-tags multi_gpu,gpu,manual + --except-tags multi_gpu,gpu,manual,examples_use_all_core --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --build-name rllibbuild-py3.10 --python-version 3.10