1
1
from typing import Dict , Any , List
2
2
from functools import partial
3
- import copy
4
3
5
4
import torch
6
5
from torch import Tensor
7
6
from torch import nn
8
- from torch .distributions import Normal , Independent , TransformedDistribution , TanhTransform
9
- from easydict import EasyDict
7
+ from torch .distributions import Normal , Independent
10
8
11
9
from ding .torch_utils import to_device , fold_batch , unfold_batch , unsqueeze_repeat
12
- from ding .utils import POLICY_REGISTRY , deep_merge_dicts
10
+ from ding .utils import POLICY_REGISTRY
13
11
from ding .policy import SACPolicy
14
12
from ding .rl_utils import generalized_lambda_returns
15
13
from ding .policy .common_utils import default_preprocess_learn
@@ -33,11 +31,12 @@ class MBSACPolicy(SACPolicy):
33
31
== ==================== ======== ============= ==================================
34
32
1 ``learn._lambda`` float 0.8 | Lambda for TD-lambda return.
35
33
2 ``learn.grad_clip` float 100.0 | Max norm of gradients.
36
- 3 ``learn.sample_ `` bool True | Whether to sample states or tra-
37
- ``state `` | nsitions from env buffer.
34
+ 3 | ``learn.sample `` bool True | Whether to sample states or
35
+ | ``_state `` | transitions from env buffer.
38
36
== ==================== ======== ============= ==================================
39
37
40
38
.. note::
39
+
41
40
For other configs, please refer to ding.policy.sac.SACPolicy.
42
41
"""
43
42
0 commit comments