Skip to content
Open
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
75bdc1d
[rllib] Merge tuned-examples into examples
Nov 21, 2025
b89a7af
Update BUILD.bazel for tuned-examples new location
Nov 21, 2025
5100f82
Gemini review
Nov 21, 2025
36d242f
Merge branch 'master' into merge-tuned-examples
Nov 21, 2025
a0dd0c3
update offline data path
Nov 24, 2025
5a4d28b
update tuned_example file paths
Nov 24, 2025
0f2d5bd
Fix file paths
Nov 24, 2025
b75ba43
Merge branch 'master' into merge-tuned-examples
Nov 26, 2025
dc3be81
Update rllib release test directory and release test paths
Nov 26, 2025
38bf29b
[rllib] Update APPO premerge
Nov 26, 2025
a8f5bcd
Merge branch 'master' into appo-premerge
Nov 27, 2025
6c0dd98
Clean appo folder
Nov 27, 2025
5e9d21d
Add stateles cartpole
Nov 27, 2025
101a5ee
Merge branch 'master' into appo-premerge
Dec 1, 2025
b946c72
pre-commit
Dec 1, 2025
2234d96
Improve documentation
Dec 2, 2025
206d8fc
Fix training scripts
Dec 3, 2025
8bfede0
Merge branch 'master' into appo-premerge
Dec 10, 2025
d9a559e
Change to TicTacToe from Connect4
Dec 10, 2025
32f0e68
Updated to BUILD.bazel to tictactoe file
Dec 10, 2025
a6270c3
kamil code-review
Dec 11, 2025
f80bfa7
Merge branch 'master' into appo-premerge
pseudo-rnd-thoughts Dec 11, 2025
617ce9f
code-review
Dec 12, 2025
2892342
update tictactoe and stop rewards
Dec 15, 2025
05db432
Merge branch 'master' into appo-premerge
kamil-kaczmarek Dec 16, 2025
f08cfab
Add nightly tests
Dec 16, 2025
350c68e
Add default_iters to atari
Dec 16, 2025
9f1bd4d
Update sac examples
Dec 16, 2025
15b8376
Update tic tac toe implementation
Dec 16, 2025
2b141ac
Rewrite TicTacToe and add stop rewards / iters for premerge
Dec 17, 2025
15d25ef
Merge branch 'master' into appo-premerge
Dec 17, 2025
fd8a835
Fix release tests cluster_compute
Dec 17, 2025
c068518
Added docstrings
Dec 18, 2025
7b122d4
code-review
Dec 18, 2025
828e383
remove type: gpu from non gpu nightly
Dec 18, 2025
43b68f9
code review
Dec 19, 2025
dd8a8ce
Merge branch 'appo-premerge' into sac-premerge-nightly
Dec 19, 2025
b98e625
Update the parameters
Dec 19, 2025
7bb656d
Add note about GPU learners
Dec 19, 2025
d989c5e
Fix run name
Dec 19, 2025
319d6b3
pre-commit + more docstring details
Dec 19, 2025
241fd39
Merge branch 'master' into appo-premerge
Dec 29, 2025
30a7b6a
Reduce the number of env-runners from 5 to 4
Dec 29, 2025
36d7c3f
Update documentation
Dec 31, 2025
866da95
Merge branch 'master' into sac-premerge-nightly
Dec 31, 2025
797a9a8
Merge branch 'appo-premerge' into sac-premerge-nightly
Dec 31, 2025
4f0739a
Update documentation, remove appo changes
Dec 31, 2025
791dbca
Fix the tictactoe implementation
Jan 5, 2026
9a9c59b
fix env-t-to-agent-t
Jan 12, 2026
29288af
debug
Jan 12, 2026
ab0a259
update 2
Jan 14, 2026
a15aebf
debug
Jan 15, 2026
9231ee1
Merge branch 'master' into fix-env-t-to-agent-t
Jan 19, 2026
7230760
update testing
Jan 19, 2026
202eeda
update testing
Jan 20, 2026
75354e7
complete testing
Jan 20, 2026
fc84311
Fix tests
Jan 20, 2026
e62be7f
Fix tests
Jan 20, 2026
605c9f9
Merge branch 'fix-env-t-to-agent-t' into sac-premerge-nightly
Jan 20, 2026
d0630b3
Add explicitly calling configs
Jan 20, 2026
267cbba
Update implementations
Jan 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 141 additions & 11 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2019,8 +2019,20 @@

# --------------------------
# APPO
#
# | APPO (14 total tests) | | Number of Learners (Device) |
# | Environment | Success | Local (CPU) | Single (CPU) | Single (GPU) | Multi (GPU) Single Node | Multi (GPU) Multi Node |
# |--------------------------------|---------|-------------|-----------------|--------------|-------------------------|------------------------|
# | (SA/D) Cartpole | 450 | ✅ | ✅ | ❌ | ❌ | ❌ |
# | (SA/D/LSTM) Stateless Cartpole | 350 | ✅ | ❌ | ✅ | ❌ | ❌ |
# | (MA/D) TicTacToe | 0 | ✅ | ✅ | ❌ | ✅ | ❌ |
# | (SA/D) Atari (Pong) | 18 | ❌ | ❌ | ❌ | ✅ | ✅ |
# | (SA/C) IsaacLab (Humanoid) | ?? | ❌ | ✅ (with 1 GPU) | ⚠️ | ❌ | ❌ |
# | (MA/D) Footsies | ?? | ❌ | ❌ | ✅ | ❌ | ❌ |
#
# --------------------------
- name: rllib_learning_tests_pong_appo_torch

- name: rllib_cartpole_local
python: "3.10"
group: RLlib tests
working_dir: rllib_tests
Expand All @@ -2030,17 +2042,49 @@

cluster:
byod:
type: gpu
post_build_script: byod_rllib.sh
runtime_env:
- RLLIB_TEST_NO_JAX_IMPORT=1
cluster_compute: 1gpu_16cpus.yaml
cluster_compute: cpu_single_node.yaml

run:
timeout: 1500 # expected 1000 seconds
script: python example_algorithms/appo/pong_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
script: python example_algorithms/appo/cartpole_appo.py --num-learners=0 --as-release-test

- name: rllib_cartpole_single_cpu
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
post_build_script: byod_rllib.sh
cluster_compute: cpu_single_node.yaml

- name: rllib_learning_tests_halfcheetah_appo_torch
run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/cartpole_appo.py --num-learners=1 --as-release-test

- name: rllib_stateless_cartpole_local
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
post_build_script: byod_rllib.sh
cluster_compute: cpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/stateless_cartpole_appo_with_lstm.py --num-learners=0 --as-release-test


- name: rllib_stateless_cartpole_gpu
python: "3.10"
group: RLlib tests
working_dir: rllib_tests
Expand All @@ -2052,13 +2096,99 @@
byod:
type: gpu
post_build_script: byod_rllib.sh
runtime_env:
- RLLIB_TEST_NO_JAX_IMPORT=1
cluster_compute: 1gpu_16cpus.yaml
cluster_compute: single_gpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/stateless_cartpole_appo_with_lstm.py --num-learners=1 --num-gpus-per-learner=1 --as-release-test

- name: rllib_tictactoe_local
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
post_build_script: byod_rllib.sh
cluster_compute: cpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/tictactoe_appo.py --num-learners=0 --as-release-test

- name: rllib_tictactoe_single_cpu
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
post_build_script: byod_rllib.sh
cluster_compute: cpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/tictactoe_appo.py --num-learners=1 --as-release-test

- name: rllib_tictactoe_multi_gpu_single_node
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
type: gpu
post_build_script: byod_rllib.sh
cluster_compute: multi_gpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/tictactoe_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test

- name: rllib_atari_multi_gpu_single_node
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
type: gpu
post_build_script: byod_rllib.sh
cluster_compute: multi_gpu_single_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/atari_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test

- name: rllib_atari_multi_gpu_multi_node
python: "3.10"
group: RLlib tests
working_dir: rllib_tests

frequency: nightly
team: rllib

cluster:
byod:
type: gpu
post_build_script: byod_rllib.sh
cluster_compute: multi_gpu_multi_node.yaml

run:
timeout: 3000 # expected 2000 seconds
script: python example_algorithms/appo/halfcheetah_appo.py --num-learners=1 --num-env-runners=12 --as-release-test
script: python example_algorithms/appo/atari_appo.py --num-learners=2 --num-gpus-per-learner=1 --num-env-runners=10 --as-release-test

########################
# Core Nightly Tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ max_workers: 0

head_node_type:
name: head_node
instance_type: g4dn.12xlarge
instance_type: r7a.2xlarge # 0 gpus with 8 cpus

worker_node_types: []

Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west-2

max_workers: 2
max_workers: 0

head_node_type:
name: head_node
instance_type: g5.12xlarge
instance_type: r7a.2xlarge

worker_node_types:
- name: worker_node
instance_type: m5.4xlarge
min_workers: 1
max_workers: 1
instance_type: g5.2xlarge
min_workers: 2
max_workers: 2
use_spot: false

advanced_configurations_json:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west-2

max_workers: 2
max_workers: 0

head_node_type:
name: head_node
instance_type: g5.24xlarge
instance_type: g5.12xlarge # 4 gpus with 48 cpus

worker_node_types: []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ max_workers: 0

head_node_type:
name: head_node
instance_type: g5.4xlarge
instance_type: g5.4xlarge # 1 gpu with 16 cpus

worker_node_types: []

Expand Down
Loading