ray-project · pseudo-rnd-thoughts · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2019,8 +2019,20 @@
 
 # --------------------------
 # APPO
+#
+# | APPO (14 total tests)          |         |                Number of Learners (Device)                                                      |
+# | Environment                    | Success | Local (CPU) | Single (CPU)    | Single (GPU) | Multi (GPU) Single Node | Multi (GPU) Multi Node |
+# |--------------------------------|---------|-------------|-----------------|--------------|-------------------------|------------------------|
+# | (SA/D) Cartpole                | 450     | ✅          | ✅              | ❌           | ❌                     | ❌                     |
+# | (SA/D/LSTM) Stateless Cartpole | 350     | ✅          | ❌              | ✅           | ❌                     | ❌                     |
+# | (MA/D) TicTacToe               | 0       | ✅          | ✅              | ❌           | ✅                     | ❌                     |
+# | (SA/D) Atari (Pong)            | 18      | ❌          | ❌              | ❌           | ✅                     | ✅                     |
+# | (SA/C) IsaacLab (Humanoid)     | ??      | ❌          | ✅ (with 1 GPU) | ⚠️           | ❌                     | ❌                     |
+# | (MA/D) Footsies                | ??      | ❌          | ❌              | ✅           | ❌                     | ❌                     |
+#
 # --------------------------
-- name: rllib_learning_tests_pong_appo_torch
+
+- name: rllib_cartpole_local
   python: "3.10"
   group: RLlib tests
   working_dir: rllib_tests
@@ -2030,17 +2042,49 @@
 
   cluster:
     byod:
-      type: gpu
       post_build_script: byod_rllib.sh
-      runtime_env:
-        - RLLIB_TEST_NO_JAX_IMPORT=1
-    cluster_compute: 1gpu_16cpus.yaml
+    cluster_compute: cpu_single_node.yaml
 
   run:
     timeout: 1500  # expected 1000 seconds
-    script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+    script: python example_algorithms/appo/cartpole_appo.py --num-learners=0 --as-release-test
+
+- name: rllib_cartpole_single_cpu
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      post_build_script: byod_rllib.sh
+    cluster_compute: cpu_single_node.yaml
 
-- name: rllib_learning_tests_halfcheetah_appo_torch
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/cartpole_appo.py --num-learners=1 --as-release-test
+
+- name: rllib_stateless_cartpole_local
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      post_build_script: byod_rllib.sh
+    cluster_compute: cpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/stateless_cartpole_appo_with_lstm.py --num-learners=0 --as-release-test
+
+
+- name: rllib_stateless_cartpole_gpu
   python: "3.10"
   group: RLlib tests
   working_dir: rllib_tests
@@ -2052,13 +2096,99 @@
     byod:
       type: gpu
       post_build_script: byod_rllib.sh
-      runtime_env:
-        - RLLIB_TEST_NO_JAX_IMPORT=1
-    cluster_compute: 1gpu_16cpus.yaml
+    cluster_compute: single_gpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/stateless_cartpole_appo_with_lstm.py --num-learners=1 --num-gpus-per-learner=1 --as-release-test
+
+- name: rllib_tictactoe_local
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      post_build_script: byod_rllib.sh
+    cluster_compute: cpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/tictactoe_appo.py --num-learners=0 --as-release-test
+
+- name: rllib_tictactoe_single_cpu
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      post_build_script: byod_rllib.sh
+    cluster_compute: cpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/tictactoe_appo.py --num-learners=1 --as-release-test
+
+- name: rllib_tictactoe_multi_gpu_single_node
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib.sh
+    cluster_compute: multi_gpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/tictactoe_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test
+
+- name: rllib_atari_multi_gpu_single_node
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib.sh
+    cluster_compute: multi_gpu_single_node.yaml
+
+  run:
+    timeout: 3000  # expected 2000 seconds
+    script: python example_algorithms/appo/atari_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test
+
+- name: rllib_atari_multi_gpu_multi_node
+  python: "3.10"
+  group: RLlib tests
+  working_dir: rllib_tests
+
+  frequency: nightly
+  team: rllib
+
+  cluster:
+    byod:
+      type: gpu
+      post_build_script: byod_rllib.sh
+    cluster_compute: multi_gpu_multi_node.yaml
 
   run:
     timeout: 3000  # expected 2000 seconds
-    script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
+    script: python example_algorithms/appo/atari_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test
 
 ########################
 # Core Nightly Tests

diff --git a/release/rllib_tests/2gpus_32cpus.yaml → release/rllib_tests/cpu_single_node.yaml b/release/rllib_tests/2gpus_32cpus.yaml → release/rllib_tests/cpu_single_node.yaml
@@ -5,7 +5,7 @@ max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: g4dn.12xlarge
+    instance_type: r7a.2xlarge  # 0 gpus with 8 cpus
 
 worker_node_types: []
 

diff --git a/release/rllib_tests/4gpus_64cpus.yaml → ...ase/rllib_tests/multi_gpu_multi_node.yaml b/release/rllib_tests/4gpus_64cpus.yaml → ...ase/rllib_tests/multi_gpu_multi_node.yaml
@@ -1,17 +1,17 @@
 cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
 region: us-west-2
 
-max_workers: 2
+max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: g5.12xlarge
+    instance_type: r7a.2xlarge
 
 worker_node_types:
     - name: worker_node
-      instance_type: m5.4xlarge
-      min_workers: 1
-      max_workers: 1
+      instance_type: g5.2xlarge
+      min_workers: 2
+      max_workers: 2
       use_spot: false
 
 advanced_configurations_json:

diff --git a/release/rllib_tests/8gpus_96cpus.yaml → ...se/rllib_tests/multi_gpu_single_node.yaml b/release/rllib_tests/8gpus_96cpus.yaml → ...se/rllib_tests/multi_gpu_single_node.yaml
@@ -1,11 +1,11 @@
 cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
 region: us-west-2
 
-max_workers: 2
+max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: g5.24xlarge
+    instance_type: g5.12xlarge  # 4 gpus with 48 cpus
 
 worker_node_types: []
 

diff --git a/release/rllib_tests/1gpu_16cpus.yaml → ...e/rllib_tests/single_gpu_single_node.yaml b/release/rllib_tests/1gpu_16cpus.yaml → ...e/rllib_tests/single_gpu_single_node.yaml
@@ -5,7 +5,7 @@ max_workers: 0
 
 head_node_type:
     name: head_node
-    instance_type: g5.4xlarge
+    instance_type: g5.4xlarge  # 1 gpu with 16 cpus
 
 worker_node_types: []