ray-project · ArturNiederfahrenhorst · Jan 13, 2026 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml
@@ -24,75 +24,67 @@ steps:
     tags: cibase
 
   # tests
-  - label: ":brain: rllib: algorithm, model and others"
+  - label: ":brain: rllib: component testing"
     tags: rllib_directly
     parallelism: 4
     instance_type: large
     commands:
+      # All tests to with tags for components without gpu or multi_gpu tags
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
-        --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,no_cpu,torch_2.x_only_benchmark,manual
+        --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core
+        --except-tags gpu,multi_gpu,manual
         --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
         --build-name rllibbuild-py3.10
         --python-version 3.10
     depends_on: rllibbuild
 
-  - label: ":brain: rllib: learning tests pytorch"
-    tags: rllib
-    parallelism: 5
-    instance_type: large
-    commands:
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
-        --only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
-        --except-tags tf_only,tf2_only,gpu,multi_gpu,learning_tests_pytorch_use_all_core
-        --test-arg --framework=torch
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
-        --only-tags learning_tests_pytorch_use_all_core
-        --except-tags tf_only,tf2_only,gpu,multi_gpu
-        --test-arg --framework=torch
-        --skip-ray-installation
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-    depends_on: rllibbuild
-
   - label: ":brain: rllib: examples"
     tags: rllib
     parallelism: 6
     instance_type: large
     commands:
+      # Tests all examples without gpu, multi_gpu or examples_use_all_cores tag
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 2
         --only-tags examples
-        --except-tags multi_gpu,gpu,examples_use_all_core
+        --except-tags gpu,multi_gpu,manual,examples_use_all_core
         --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
         --build-name rllibbuild-py3.10
         --python-version 3.10
+      # Tests all examples without gpu or multi_gpu
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
         --only-tags examples_use_all_core
-        --skip-ray-installation
-        --except-tags multi_gpu,gpu
+        --except-tags gpu,multi_gpu,manual
         --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
         --build-name rllibbuild-py3.10
         --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
     depends_on: rllibbuild
 
-  - label: ":brain: rllib: tests dir"
-    tags: rllib_directly
-    parallelism: 2
+  - label: ":brain: rllib: learning tests"
+    tags: rllib
+    parallelism: 5
     instance_type: large
     commands:
+      # learning tests without a gpu
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
-        --only-tags tests_dir
-        --except-tags multi_gpu,manual
-        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+        --only-tags learning_tests
+        --except-tags gpu,multi_gpu,learning_tests_use_all_core,manual
+        --test-arg --framework=torch
+        --build-name rllibbuild-py3.10
+        --python-version 3.10
+      # learning tests without a gpu but use all cores
+      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
+        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
+        --only-tags learning_tests_use_all_core
+        --except-tags gpu,multi_gpu,manual
+        --test-arg --framework=torch
         --build-name rllibbuild-py3.10
         --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
     depends_on: rllibbuild
 
   - label: ":brain: rllib: gpu tests"
@@ -103,60 +95,35 @@ steps:
     parallelism: 5
     instance_type: gpu
     commands:
+      # All gpu tagged tests
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
         --only-tags gpu
+        --except-tags multi_gpu,manual
         --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
         --test-env=RLLIB_NUM_GPUS=1
         --build-name rllibgpubuild-py3.10
         --python-version 3.10
     depends_on: rllibgpubuild
 
-  - label: ":brain: rllib: data tests"
+  - label: ":brain: rllib: multi-gpu tests"
     tags:
-      - data
-      - rllib
-      - disabled  # Tests of this tag do not exist any more.
-    instance_type: large
+      - rllib_gpu
+      - gpu
+      - skip-on-microcheck
+    parallelism: 5
+    instance_type: gpu-large
     commands:
-      # learning tests pytorch
+      # All multi-gpu tagged tests
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-        --parallelism-per-worker 3
-        --only-tags learning_tests_with_ray_data
-        --except-tags multi_gpu,gpu,tf_only,tf2_only
-        --test-arg --framework=torch
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-
-      # rllib unittests
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-        --parallelism-per-worker 3
-        --only-tags ray_data
-        --except-tags learning_tests_with_ray_data,multi_gpu,gpu
-        --skip-ray-installation
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-        # reuse the same docker image as the previous run
-    depends_on: rllibbuild
-
-  - label: ":brain: rllib: benchmarks"
-    tags: rllib
-    instance_type: medium
-    commands:
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --only-tags torch_2.x_only_benchmark
-        --build-name rllibbuild-py3.10
+        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
+        --parallelism-per-worker 2
+        --gpus 4
+        --only-tags multi_gpu
+        --except-tags manual
+        --build-name rllibgpubuild-py3.10
         --python-version 3.10
-    depends_on: rllibbuild
-
-  # - label: ":brain: rllib: memory leak pytorch tests"
-  #  tags: rllib
-  #  instance_type: medium
-  #  commands:
-  #    - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-  #      --only-tags memory_leak_tests
-  #      --except-tags flaky
-  #      --test-arg --framework=torch
-  #  depends_on: rllibbuild
+    depends_on: rllibgpubuild
 
   - label: ":brain: rllib: doc tests"
     tags:
@@ -167,59 +134,79 @@ steps:
     commands:
       # doc tests
       - bazel run //ci/ray_ci:test_in_docker -- python/ray/... //doc/... rllib
-        --except-tags gpu
         --only-tags doctest
+        --except-tags gpu,manual
         --parallelism-per-worker 2
         --build-name rllibbuild-py3.10
         --python-version 3.10
       # doc examples
       - bazel run //ci/ray_ci:test_in_docker -- //doc/... rllib
         --except-tags gpu,post_wheel_build,timeseries_libs,doctest
         --parallelism-per-worker 2
-        --skip-ray-installation
         --build-name rllibbuild-py3.10
         --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
+      # documentation test
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
         --only-tags documentation
         --parallelism-per-worker 2
-        --skip-ray-installation
         --build-name rllibbuild-py3.10
         --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
     depends_on: rllibbuild
 
-  - label: ":brain: rllib: multi-gpu tests"
+  - label: ":brain: rllib: flaky component & examples tests"
+    key: rllib_flaky_tests_02
     tags:
-      - rllib_gpu
-      - gpu
-      - skip-on-microcheck
-    parallelism: 5
-    instance_type: gpu-large
+      - rllib
+      - rllib_flaky
+      - skip-on-premerge
+    instance_type: large
     commands:
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib
-        --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}"
-        --parallelism-per-worker 2
-        --gpus 4
-        --only-tags multi_gpu
-        --build-name rllibgpubuild-py3.10
+      # flaky components
+      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
+        --only-tags env,evaluation,models,offline,policy,utils,algorithms,callbacks,core
+        --except-tags learning_tests,examples,documentation,gpu,multi_gpu,manual
+        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+        --build-name rllibbuild-py3.10
         --python-version 3.10
-    depends_on: rllibgpubuild
 
-  - label: ":brain: rllib: flaky multi-gpu tests"
-    key: rllib_flaky_multi_gpu_tests
+      # flaky examples
+      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
+        --only-tags examples
+        --except-tags multi_gpu,gpu,manual,examples_use_all_core
+        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+        --build-name rllibbuild-py3.10
+        --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
+
+      # flaky examples use all core
+      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
+        --only-tags examples_use_all_core
+        --except-tags gpu,multi_gpu,manual
+        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+        --build-name rllibbuild-py3.10
+        --python-version 3.10
+        --skip-ray-installation # reuse the same docker image as the previous run
+    depends_on: rllibbuild
+    soft_fail: true
+
+  - label: ":brain: rllib: flaky learning tests"
+    key: rllib_flaky_tests_01
     tags:
-      - rllib_gpu
-      - gpu
+      - rllib
       - rllib_flaky
       - skip-on-premerge
-    instance_type: gpu-large
+    instance_type: large
     commands:
+      # torch
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
-        --parallelism-per-worker 2
-        --gpus 4
-        --only-tags multi_gpu
-        --build-name rllibgpubuild-py3.10
+        --only-tags learning_tests
+        --except-tags gpu,multi_gpu,manual
+        --test-arg --framework=torch
+        --build-name rllibbuild-py3.10
         --python-version 3.10
-    depends_on: rllibgpubuild
+    depends_on: rllibbuild
     soft_fail: true
 
   - label: ":brain: rllib: flaky gpu tests"
@@ -233,89 +220,29 @@ steps:
     commands:
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
         --only-tags gpu
+        --except-tags multi_gpu,manual
         --test-env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
         --test-env=RLLIB_NUM_GPUS=1
         --build-name rllibgpubuild-py3.10
         --python-version 3.10
     depends_on: rllibgpubuild
     soft_fail: true
 
-  - label: ":brain: rllib: flaky tests (learning tests)"
-    key: rllib_flaky_tests_01
+  - label: ":brain: rllib: flaky multi-gpu tests"
+    key: rllib_flaky_multi_gpu_tests
     tags:
-      - rllib
+      - rllib_gpu
+      - gpu
       - rllib_flaky
       - skip-on-premerge
-    instance_type: large
+    instance_type: gpu-large
     commands:
-      # torch
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
-        --only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous
-        --except-tags tf_only,tf2_only,multi_gpu,gpu
-        --test-arg --framework=torch
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-
-      # tf2-static-graph
       - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
-        --only-tags tf_only
-        --except-tags torch_only,tf2_only,no_tf_static_graph,multi_gpu,gpu
-        --test-arg --framework=tf
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-        --skip-ray-installation # reuse the same docker image as the previous run
-      # tf2-eager-tracing
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests
-        --only-tags tf2_only
-        --except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing,gpu
-        --test-arg --framework=tf2
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-        --skip-ray-installation # reuse the same docker image as the previous run
-    depends_on: rllibbuild
-    soft_fail: true
-
-  - label: ":brain: rllib: flaky tests (examples/rlmodule/models/tests_dir)"
-    key: rllib_flaky_tests_02
-    tags:
-      - rllib
-      - rllib_flaky
-      - skip-on-premerge
-    instance_type: large
-    commands:
-      # examples
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
-        --only-tags examples
-        --except-tags multi_gpu,gpu
-        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-
-      # rlmodule tests
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
-        --only-tags rlm
-         --except-tags multi_gpu,gpu
-        --test-env RLLIB_ENABLE_RL_MODULE=1
-        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-        --skip-ray-installation # reuse the same docker image as the previous run
-
-      # algorithm, models
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib  --run-flaky-tests --parallelism-per-worker 3
-        --except-tags learning_tests,memory_leak_tests,examples,tests_dir,documentation,multi_gpu,gpu,no_cpu,torch_2.x_only_benchmark,manual
-        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
-        --build-name rllibbuild-py3.10
-        --python-version 3.10
-        --skip-ray-installation # reuse the same docker image as the previous run
-
-      # tests/ dir
-      - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
-        --only-tags tests_dir
-        --except-tags multi_gpu,gpu,manual
-        --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
-        --build-name rllibbuild-py3.10
+        --parallelism-per-worker 2
+        --gpus 4
+        --only-tags multi_gpu
+        --except-tags manual
+        --build-name rllibgpubuild-py3.10
         --python-version 3.10
-        --skip-ray-installation # reuse the same docker image as the previous run
-    depends_on: rllibbuild
+    depends_on: rllibgpubuild
     soft_fail: true