Skip to content

Commit

Permalink
Adjust configurations for benchmark experiments.
Browse files Browse the repository at this point in the history
  • Loading branch information
zjowowen committed Jun 13, 2024
1 parent d0e68c0 commit abfd595
Show file tree
Hide file tree
Showing 48 changed files with 90 additions and 90 deletions.
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
4 changes: 2 additions & 2 deletions grl_pipelines/benchmark/gmpg/gvp/halfcheetah_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=12000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/gvp/hopper_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=20.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/gvp/hopper_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/gvp/hopper_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/gvp/walker2d_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
4 changes: 2 additions & 2 deletions grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/gvp/walker2d_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=2e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/icfm/halfcheetah_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=12000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/icfm/hopper_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=20.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/icfm/hopper_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/icfm/hopper_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/icfm/walker2d_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
4 changes: 2 additions & 2 deletions grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/icfm/walker2d_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -123,8 +123,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=2e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/vpsde/halfcheetah_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=12000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/vpsde/hopper_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=20.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/vpsde/hopper_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=3e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
2 changes: 1 addition & 1 deletion grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand Down
4 changes: 2 additions & 2 deletions grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_expert.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=500,
learning_rate=1e-6,
copy_from_basemodel=True,
gradtime_step=1000,
beta=4.0,
Expand Down
6 changes: 3 additions & 3 deletions grl_pipelines/benchmark/gmpg/vpsde/walker2d_medium_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
t_span=32,
critic=dict(
batch_size=4096,
epochs=2000,
epochs=5000,
learning_rate=3e-4,
discount_factor=0.99,
update_momentum=0.005,
Expand All @@ -127,8 +127,8 @@
),
guided_policy=dict(
batch_size=40960,
epochs=50,
learning_rate=1e-4,
epochs=1000,
learning_rate=2e-5,
copy_from_basemodel=True,
gradtime_step=1000,
beta=8.0,
Expand Down
Loading

0 comments on commit abfd595

Please sign in to comment.