ray-project · pseudo-rnd-thoughts · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
@@ -154,8 +154,10 @@ Soft Actor Critic (SAC)
 
 
 **Tuned examples:**
-`Pendulum-v1 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/sac/pendulum-sac.yaml>`__,
-`HalfCheetah-v3 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/sac/halfcheetah_sac.py>`__,
+`Cartpole-v1 <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/sac/cartpole_sac.py>`__,
+`Atari (Pong-v5) with Rainbow <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/sac/atari_sac.py>`__,
+`with LSTM <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/sac/stateless_cartpole_sac_with_lstm.py>`__,
+`Multi-Agent <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/sac/tictactoe_sac.py>`__,
 
 **SAC-specific configs** (see also :ref:`generic algorithm settings <rllib-algo-configuration-generic-settings>`):
 
@@ -195,8 +197,10 @@ Asynchronous Proximal Policy Optimization (APPO)
 
 
 **Tuned examples:**
-`Pong-v5 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/appo/pong_appo.py>`__
-`HalfCheetah-v4 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/appo/halfcheetah_appo.py>`__
+`Atari (Pong-v5) <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/appo/atari_appo.py>`__
+`MuJoCo (Humanoid-v4) <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/appo/mujoco_appo.py>`__
+`Using an LSTM <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/appo/stateless_cartpole_appo_with_lstm.py>`__
+`Multi-Agent <https://github.com/ray-project/ray/blob/master/rllib/examples/algorithms/appo/tictactoe_appo.py>`__
 
 **APPO-specific configs** (see also :ref:`generic algorithm settings <rllib-algo-configuration-generic-settings>`):
 

@@ -87,12 +87,29 @@ py_library(
 )
 
 # --------------------------------------------------------------------
-# Algorithms learning regression tests.
+# Algorithms learning regression tests (rllib/examples/algorithm/[algo-name]).
 #
 # Tag: learning_tests
 #
-# This will test python/yaml config files
-# inside rllib/examples/algorithms/[algo-name] for actual learning success.
+# These tests check that the algorithm achieves above random performance within a relatively short period of time,
+#   not that the algorithm reaches the optimal policy.
+#
+# For single to multi-learner tests, the expected output should change,
+#   either reducing the maximum iterations or samples, or increasing the max return
+#   to ensure that the multi-learner is achieving something that the single shouldn’t be able to normally achieve.
+#
+# Compute Config
+#   - local (CPU) = 7 CPUs, 0 GPU: 5 Env Runners, 0 Learners on CPU, 2 Aggregator Actors per Learner on CPU
+#   - single (CPU) = 8 CPUs, 0 GPU: 5 Env Runner, 1 Learners on CPU, 2 Aggregator Actors per Learner on CPU
+#   - single (GPU) = 8 CPUs, 1 GPU: 5 Env Runner, 1 Learners on GPU, 2 Aggregator Actors per Learner on CPU
+#   - multi (GPU) = 16 CPUs, 2 GPUs: 10 Env Runners, 2 Learners on GPU, 2 Aggregator Actors per Learner on CPU (4 total CPUs)
+#
+# Legend
+#   - SA = Single Agent Environment
+#   - MA = Multi Agent Environment
+#   - D = Discrete actions
+#   - C = Continuous actions
+#   - LSTM = recurrent policy through lstms
 # --------------------------------------------------------------------
 
 # APPO
@@ -1622,238 +1639,91 @@ py_test(
     ],
 )
 
-# SAC
-# MountainCar
-py_test(
-    name = "learning_tests_mountaincar_sac",
-    size = "large",
-    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
-    args = [
-        "--as-test",
-    ],
-    main = "examples/algorithms/sac/mountaincar_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_discrete",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_mountaincar_sac_gpu",
-    size = "large",
-    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
-    args = [
-        "--as-test",
-        "--num-learners=1",
-        "--num-gpus-per-learner=1",
-    ],
-    main = "examples/algorithms/sac/mountaincar_sac.py",
-    tags = [
-        "exclusive",
-        "gpu",
-        "learning_tests",
-        "learning_tests_discrete",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_mountaincar_sac_multi_cpu",
-    size = "large",
-    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
-    args = [
-        "--as-test",
-        "--num-learners=2",
-    ],
-    main = "examples/algorithms/sac/mountaincar_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_discrete",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_mountaincar_sac_multi_gpu",
-    size = "large",
-    timeout = "eternal",
-    srcs = ["examples/algorithms/sac/mountaincar_sac.py"],
-    args = [
-        "--as-test",
-        "--num-learners=2",
-        "--num-gpus-per-learner=1",
-    ],
-    main = "examples/algorithms/sac/mountaincar_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_discrete",
-        "multi_gpu",
-        "team:rllib",
-        "torch_only",
-    ],
-)
+# | SAC (14 total tests)           |         |                Number of Learners (Device)                 |
+# | Environment                    | Success | Local (CPU) | Single (CPU)    | Single (GPU) | Multi (GPU) |
+# |--------------------------------|---------|-------------|-----------------|--------------|-------------|
+# | (SA/D/LSTM) Stateless Cartpole | 150     | ✅          | ❌              | ❌           | ❌         |
+# | (MA/D) TicTacToe               | -2.0    | ❌          | ✅              | ❌           | ❌         |
+# | (SA/D) Atari (Pong)            | 5       | ❌          | ❌              | ❌           | ✅         |
+# | (SA/C) MuJoCo (Humanoid)       | 200     | ❌          | ❌              | ✅           | ❌         |
 
-# Pendulum
 py_test(
-    name = "learning_tests_pendulum_sac",
+    name = "learning_tests_sac_stateless_cartpole_local",
     size = "large",
-    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/stateless_cartpole_sac_with_lstm.py"],
     args = [
         "--as-test",
+        "--num-cpus=7",
+        "--num-env-runners=5",
+        "--num-learners=0",
+        "--stop-reward=150",
     ],
-    main = "examples/algorithms/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/stateless_cartpole_sac_with_lstm.py",
     tags = [
         "exclusive",
         "learning_tests",
-        "learning_tests_continuous",
         "team:rllib",
-        "torch_only",
     ],
 )
 
 py_test(
-    name = "learning_tests_pendulum_sac_gpu",
+    name = "learning_tests_sac_tictactoe_single_cpu",
     size = "large",
-    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/tictactoe_sac.py"],
     args = [
         "--as-test",
+        "--num-cpus=8",
+        "--num-env-runners=5",
         "--num-learners=1",
-        "--num-gpus-per-learner=1",
+        "--stop-reward=-2",
     ],
-    main = "examples/algorithms/sac/pendulum_sac.py",
+    main = "examples/algorithms/sac/tictactoe_sac.py",
     tags = [
         "exclusive",
-        "gpu",
         "learning_tests",
-        "learning_tests_continuous",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_pendulum_sac_multi_cpu",
-    size = "large",
-    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
-    args = [
-        "--as-test",
-        "--num-learners=2",
-    ],
-    main = "examples/algorithms/sac/pendulum_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_continuous",
         "team:rllib",
-        "torch_only",
     ],
 )
 
 py_test(
-    name = "learning_tests_pendulum_sac_multi_gpu",
+    name = "learning_tests_sac_atari_multi_gpu",
     size = "large",
-    srcs = ["examples/algorithms/sac/pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/atari_sac.py"],
     args = [
         "--as-test",
+        "--num-cpus=16",
+        "--num-env-runners=10",
         "--num-learners=2",
         "--num-gpus-per-learner=1",
+        "--stop-reward=5",
     ],
-    main = "examples/algorithms/sac/pendulum_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_continuous",
-        "multi_gpu",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-# MultiAgentPendulum
-py_test(
-    name = "learning_tests_multi_agent_pendulum_sac",
-    size = "large",
-    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
-    args = [
-        "--as-test",
-        "--num-agents=2",
-        "--num-cpus=4",
-    ],
-    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/atari_sac.py",
     tags = [
         "exclusive",
+        "gpu",
         "learning_tests",
-        "learning_tests_continuous",
         "team:rllib",
-        "torch_only",
     ],
 )
 
 py_test(
-    name = "learning_tests_multi_agent_pendulum_sac_gpu",
+    name = "learning_tests_sac_mujoco_single_gpu",
     size = "large",
-    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
+    srcs = ["examples/algorithms/sac/mujoco_sac.py"],
     args = [
         "--as-test",
-        "--num-agents=2",
-        "--num-cpus=4",
+        "--num-cpus=8",
+        "--num-env-runners=5",
         "--num-learners=1",
         "--num-gpus-per-learner=1",
+        "--stop-reward=200",
     ],
-    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
-    tags = [
-        "exclusive",
-        "gpu",
-        "learning_tests",
-        "learning_tests_continuous",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_multi_agent_pendulum_sac_multi_cpu",
-    size = "large",
-    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
-    args = [
-        "--num-agents=2",
-        "--num-learners=2",
-    ],
-    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
-    tags = [
-        "exclusive",
-        "learning_tests",
-        "learning_tests_continuous",
-        "team:rllib",
-        "torch_only",
-    ],
-)
-
-py_test(
-    name = "learning_tests_multi_agent_pendulum_sac_multi_gpu",
-    size = "large",
-    timeout = "eternal",
-    srcs = ["examples/algorithms/sac/multi_agent_pendulum_sac.py"],
-    args = [
-        "--num-agents=2",
-        "--num-learners=2",
-        "--num-gpus-per-learner=1",
-    ],
-    main = "examples/algorithms/sac/multi_agent_pendulum_sac.py",
+    main = "examples/algorithms/sac/mountaincar_sac.py",
     tags = [
         "exclusive",
         "learning_tests",
-        "learning_tests_continuous",
         "multi_gpu",
         "team:rllib",
-        "torch_only",
     ],
 )
 

@@ -132,7 +132,7 @@ class DefaultModelConfig:
     #: Activation function descriptor for the stack configured by `head_fcnet_hiddens`.
     #: Supported values are: 'tanh', 'relu', 'swish' (or 'silu', which is the same),
     #: and 'linear' (or None).
-    head_fcnet_activation: str = "relu"
+    head_fcnet_activation: str | None = "relu"
     #: Initializer function or class descriptor for the weight/kernel matrices in the
     #: stack configured by `head_fcnet_hiddens`. Supported values are the initializer
     #: names (str), classes or functions listed by the frameworks (`torch`). See