@@ -55,11 +55,9 @@ class R2D2GTrXLPolicy(Policy):
55
55
| ``done`` | calculation. | fake termination env
56
56
15 ``collect.n_sample`` int [8, 128] | The number of training samples of a | It varies from
57
57
| call of collector. | different envs
58
- 16 | ``collect.unroll `` int 25 | unroll length of an iteration | unroll_len>1
58
+ 16 | ``collect.seq `` int 20 | Training sequence length | unroll_len>=seq_len >1
59
59
| ``_len``
60
- 17 | ``collect.seq`` int 20 | Training sequence length | unroll_len>=seq_len>1
61
- | ``_len``
62
- 18 | ``learn.init_`` str zero | 'zero' or 'old', how to initialize the |
60
+ 17 | ``learn.init_`` str zero | 'zero' or 'old', how to initialize the |
63
61
| ``memory`` | memory before each training iteration. |
64
62
== ==================== ======== ============== ======================================== =======================
65
63
"""
@@ -81,7 +79,7 @@ class R2D2GTrXLPolicy(Policy):
81
79
discount_factor = 0.99 ,
82
80
# (int) N-step reward for target q_value estimation
83
81
nstep = 5 ,
84
- # how many steps to use as burnin
82
+ # (int) How many steps to use in burnin phase
85
83
burnin_step = 1 ,
86
84
# (int) trajectory length
87
85
unroll_len = 25 ,
@@ -158,7 +156,7 @@ def _init_learn(self) -> None:
158
156
self ._seq_len = self ._cfg .seq_len
159
157
self ._value_rescale = self ._cfg .learn .value_rescale
160
158
self ._init_memory = self ._cfg .learn .init_memory
161
- assert self ._init_memory in ['zero' , 'old' ]
159
+ assert self ._init_memory in ['zero' , 'old' ], self . _init_memory
162
160
163
161
self ._target_model = copy .deepcopy (self ._model )
164
162
@@ -352,7 +350,6 @@ def _init_collect(self) -> None:
352
350
Collect mode init method. Called by ``self.__init__``.
353
351
Init unroll length and sequence len, collect model.
354
352
"""
355
- assert 'unroll_len' not in self ._cfg .collect , "Use default unroll_len"
356
353
self ._nstep = self ._cfg .nstep
357
354
self ._gamma = self ._cfg .discount_factor
358
355
self ._unroll_len = self ._cfg .unroll_len
0 commit comments