Skip to content

Commit 79e8d6d

Browse files
authored
Sooper go-to-goal v2 (#209)
1 parent 4d1c048 commit 79e8d6d

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# @package _global_
2+
defaults:
3+
- override /environment: go_to_goal
4+
- override /agent: mbpo
5+
- override /agent/data_collection: episodic
6+
- override /agent/cost_robustness: pessimistic_cost_update
7+
- _self_
8+
9+
training:
10+
num_timesteps: 750000
11+
train_domain_randomization: false
12+
eval_domain_randomization: false
13+
safe: true
14+
safety_budget: 25
15+
action_repeat: 4
16+
num_envs: 1
17+
num_evals: 25
18+
wandb_id: 2k0olihe
19+
20+
agent:
21+
activation: swish
22+
policy_hidden_layer_sizes: [256, 256, 256]
23+
value_hidden_layer_sizes: [512, 512]
24+
model_hidden_layer_sizes: [400, 400, 400, 400]
25+
batch_size: 256
26+
min_replay_size: 5000
27+
max_replay_size: 1048576
28+
critic_grad_updates_per_step: 2000
29+
model_grad_updates_per_step: 140000
30+
num_model_rollouts: 100000
31+
learning_rate: 3e-6
32+
critic_learning_rate: 1e-6
33+
model_learning_rate: 1e-4
34+
pessimism: 12
35+
optimism: 0.1
36+
safety_discounting: 0.99
37+
safety_filter: sooper
38+
load_auxiliaries: true
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# @package _global_
2+
defaults:
3+
- mujoco_playground_dm_control
4+
- override /environment: go_to_goal
5+
- override /agent/cost_robustness: ucb_cost
6+
- override /agent/propagation: spidr
7+
- override /agent/penalizer: lagrangian
8+
- _self_
9+
10+
training:
11+
num_timesteps: 5000000
12+
train_domain_randomization: true
13+
eval_domain_randomization: false
14+
safe: true
15+
safety_budget: 25
16+
action_repeat: 4
17+
18+
train_params:
19+
damping:
20+
x: [.9, 1.1]
21+
y: [.9, 1.1]
22+
z: [.9, 1.1]
23+
gear:
24+
x: [-0.2, 0.2]
25+
z: [-0.1, 0.1]
26+
mass: [1., 1.]
27+
28+
agent:
29+
activation: swish
30+
safety_discounting: 0.99
31+
penalizer:
32+
lagrange_multiplier: 5.
33+
penalty_multiplier: 2.
34+
penalty_multiplier_factor: 0.25
35+
propagation:
36+
num_envs: 8
37+
lambda_: 0.175

0 commit comments

Comments
 (0)