Adjust configurations for benchmark experiments.

opendilab · Jun 13, 2024 · abfd595 · abfd595
1 parent d0e68c0
commit abfd595
Show file tree

Hide file tree

Showing 48 changed files with 90 additions and 90 deletions.
diff --git a/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium.py b/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium_expert.py b/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium_expert.py
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium_replay.py b/grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=12000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/hopper_medium.py b/grl_pipelines/benchmark/gmpg/gvp/hopper_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=20.0,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/hopper_medium_expert.py b/grl_pipelines/benchmark/gmpg/gvp/hopper_medium_expert.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/hopper_medium_replay.py b/grl_pipelines/benchmark/gmpg/gvp/hopper_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium.py b/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_expert.py b/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_expert.py
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_replay.py b/grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=2e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium.py b/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium_expert.py b/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium_expert.py
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium_replay.py b/grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=12000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/hopper_medium.py b/grl_pipelines/benchmark/gmpg/icfm/hopper_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=20.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/hopper_medium_expert.py b/grl_pipelines/benchmark/gmpg/icfm/hopper_medium_expert.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/hopper_medium_replay.py b/grl_pipelines/benchmark/gmpg/icfm/hopper_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium.py b/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_expert.py b/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_expert.py
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_replay.py b/grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_replay.py
@@ -114,7 +114,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -123,8 +123,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=2e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium.py b/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium_expert.py b/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium_expert.py
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium_replay.py b/grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium_replay.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=12000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium.py b/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=20.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_expert.py b/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_expert.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_replay.py b/grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_replay.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=3e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium.py b/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_expert.py b/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_expert.py
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=500,
+                learning_rate=1e-6,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=4.0,

diff --git a/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_replay.py b/grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_replay.py
@@ -118,7 +118,7 @@
             t_span=32,
             critic=dict(
                 batch_size=4096,
-                epochs=2000,
+                epochs=5000,
                 learning_rate=3e-4,
                 discount_factor=0.99,
                 update_momentum=0.005,
@@ -127,8 +127,8 @@
             ),
             guided_policy=dict(
                 batch_size=40960,
-                epochs=50,
-                learning_rate=1e-4,
+                epochs=1000,
+                learning_rate=2e-5,
                 copy_from_basemodel=True,
                 gradtime_step=1000,
                 beta=8.0,