@@ -9,13 +9,13 @@ def main(env_id, seed):
9
9
# ==============================================================
10
10
# begin of the most frequently changed config specified by the user
11
11
# ==============================================================
12
- collector_env_num = 8
13
- num_segments = 8
14
- evaluator_env_num = 3
12
+ # collector_env_num = 8
13
+ # num_segments = 8
14
+ # evaluator_env_num = 3
15
15
16
- # collector_env_num = 1
17
- # num_segments = 1
18
- # evaluator_env_num = 1
16
+ collector_env_num = 1
17
+ num_segments = 1
18
+ evaluator_env_num = 1
19
19
20
20
num_simulations = 50
21
21
collect_num_simulations = 25
@@ -25,6 +25,8 @@ def main(env_id, seed):
25
25
max_env_step = int (50e6 )
26
26
batch_size = 256
27
27
# batch_size = 64 # debug
28
+ # batch_size = 4 # debug
29
+
28
30
num_layers = 2
29
31
# replay_ratio = 0.25
30
32
replay_ratio = 0.1
@@ -33,6 +35,10 @@ def main(env_id, seed):
33
35
num_unroll_steps = 10
34
36
infer_context_length = 4
35
37
38
+ # game_segment_length = 40
39
+ # num_unroll_steps = 20
40
+ # infer_context_length = 8
41
+
36
42
# game_segment_length = 200
37
43
# num_unroll_steps = 16
38
44
# infer_context_length = 8
@@ -93,6 +99,8 @@ def main(env_id, seed):
93
99
norm_type = norm_type ,
94
100
num_res_blocks = 2 ,
95
101
num_channels = 128 ,
102
+ # num_res_blocks=1, # TODO
103
+ # num_channels=64,
96
104
support_size = 601 ,
97
105
policy_entropy_weight = 5e-3 ,
98
106
# policy_entropy_weight=5e-2, # TODO(pu)
@@ -125,6 +133,13 @@ def main(env_id, seed):
125
133
# final_norm_option_in_encoder="SimNorm",
126
134
# final_norm_option_in_obs_head="SimNorm",
127
135
# predict_latent_loss_type='group_kl',
136
+
137
+ # weight_decay=1e-2,
138
+ # latent_norm_loss=True,
139
+
140
+ latent_norm_loss = False ,
141
+ weight_decay = 1e-4 , # TODO
142
+
128
143
),
129
144
),
130
145
# gradient_scale=True, #TODO
@@ -160,8 +175,8 @@ def main(env_id, seed):
160
175
grad_clip_value = 5 ,
161
176
replay_buffer_size = int (1e6 ),
162
177
# eval_freq=int(5e3),
163
- # eval_freq=int(1e4),
164
- eval_freq = int (2e4 ),
178
+ eval_freq = int (1e4 ), # TODO
179
+ # eval_freq=int(2e4),
165
180
collector_env_num = collector_env_num ,
166
181
evaluator_env_num = evaluator_env_num ,
167
182
# ============= The key different params for reanalyze =============
@@ -193,8 +208,10 @@ def main(env_id, seed):
193
208
# ============ use muzero_segment_collector instead of muzero_collector =============
194
209
from lzero .entry import train_unizero_segment
195
210
196
- main_config .exp_name = f'data_unizero_longrun_20250819/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_fix-init-recur_clear20_mulossweight_spsi20_envnum{ collector_env_num } _encoder-head-ln_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
211
+ # main_config.exp_name = f'data_unizero_longrun_20250827/{env_id[:-14]}/{env_id[:-14]}_uz_wd1e-2_fix-init-recur_clear{game_segment_length}_originlossweight_spsi{game_segment_length}_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
212
+ main_config .exp_name = f'data_unizero_longrun_20250827/{ env_id [:- 14 ]} /{ env_id [:- 14 ]} _uz_lnlw001_fix-init-recur_clear{ game_segment_length } _originlossweight_spsi{ game_segment_length } _envnum{ collector_env_num } _encoder-head-ln_soft-target-005_brf{ buffer_reanalyze_freq } -rbs{ reanalyze_batch_size } -rp{ reanalyze_partition } _nlayer{ num_layers } _numsegments-{ num_segments } _gsl{ game_segment_length } _rr{ replay_ratio } _Htrain{ num_unroll_steps } -Hinfer{ infer_context_length } _bs{ batch_size } _c25_seed{ seed } '
197
213
214
+ # main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear{game_segment_length}_mulossweight_spsi{game_segment_length}_envnum{collector_env_num}_encoder-head-ln_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
198
215
199
216
# main_config.exp_name = f'data_unizero_longrun_20250819/{env_id[:-14]}/{env_id[:-14]}_uz_fix-init-recur_clear20_origlossweight_spsi20_envnum{collector_env_num}_encoder-head-l2norm_soft-target-005_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_c25_seed{seed}'
200
217
@@ -256,7 +273,7 @@ def main(env_id, seed):
256
273
main (args .env , args .seed )
257
274
258
275
"""
259
- export CUDA_VISIBLE_DEVICES=0
276
+ export CUDA_VISIBLE_DEVICES=6
260
277
cd /fs-computility/niuyazhe/puyuan/code/LightZero
261
278
python /fs-computility/niuyazhe/puyuan/code/LightZero/zoo/atari/config/atari_unizero_segment_config.py
262
279
"""
0 commit comments