@@ -12,13 +12,13 @@ def get_epsilon_greedy_fn(start: float, end: float, decay: int, type_: str = 'ex
12
12
Overview:
13
13
Generate an epsilon_greedy function with decay, which inputs current timestep and outputs current epsilon.
14
14
Arguments:
15
- - start (:obj:`float`): Epsilon start value. For ' linear' , it should be 1.0.
15
+ - start (:obj:`float`): Epsilon start value. For `` linear`` , it should be 1.0.
16
16
- end (:obj:`float`): Epsilon end value.
17
17
- decay (:obj:`int`): Controls the speed that epsilon decreases from ``start`` to ``end``. \
18
18
We recommend epsilon decays according to env step rather than iteration.
19
- - type (:obj:`str`): How epsilon decays, now supports ['linear', 'exp'(exponential)]
19
+ - type (:obj:`str`): How epsilon decays, now supports `` ['linear', 'exp'(exponential)]`` .
20
20
Returns:
21
- - eps_fn (:obj:`function`): The epsilon greedy function with decay
21
+ - eps_fn (:obj:`function`): The epsilon greedy function with decay.
22
22
"""
23
23
assert type_ in ['linear' , 'exp' ], type_
24
24
if type_ == 'exp' :
@@ -48,27 +48,27 @@ class BaseNoise(ABC):
48
48
def __init__ (self ) -> None :
49
49
"""
50
50
Overview:
51
- Initialization method
51
+ Initialization method.
52
52
"""
53
53
super ().__init__ ()
54
54
55
55
@abstractmethod
56
56
def __call__ (self , shape : tuple , device : str ) -> torch .Tensor :
57
57
"""
58
58
Overview:
59
- Generate noise according to action tensor's shape, device
59
+ Generate noise according to action tensor's shape, device.
60
60
Arguments:
61
- - shape (:obj:`tuple`): size of the action tensor, output noise's size should be the same
62
- - device (:obj:`str`): device of the action tensor, output noise's device should be the same as it
61
+ - shape (:obj:`tuple`): size of the action tensor, output noise's size should be the same.
62
+ - device (:obj:`str`): device of the action tensor, output noise's device should be the same as it.
63
63
Returns:
64
64
- noise (:obj:`torch.Tensor`): generated action noise, \
65
- have the same shape and device with the input action tensor
65
+ have the same shape and device with the input action tensor.
66
66
"""
67
67
raise NotImplementedError
68
68
69
69
70
70
class GaussianNoise (BaseNoise ):
71
- r """
71
+ """
72
72
Overview:
73
73
Derived class for generating gaussian noise, which satisfies :math:`X \sim N(\mu, \sigma^2)`
74
74
Interface:
@@ -78,10 +78,10 @@ class GaussianNoise(BaseNoise):
78
78
def __init__ (self , mu : float = 0.0 , sigma : float = 1.0 ) -> None :
79
79
"""
80
80
Overview:
81
- Initialize :math:`\mu` and :math:`\sigma` in Gaussian Distribution
81
+ Initialize :math:`\mu` and :math:`\sigma` in Gaussian Distribution.
82
82
Arguments:
83
- - mu (:obj:`float`): :math:`\mu` , mean value
84
- - sigma (:obj:`float`): :math:`\sigma` , standard deviation, should be positive
83
+ - mu (:obj:`float`): :math:`\mu` , mean value.
84
+ - sigma (:obj:`float`): :math:`\sigma` , standard deviation, should be positive.
85
85
"""
86
86
super (GaussianNoise , self ).__init__ ()
87
87
self ._mu = mu
@@ -125,14 +125,15 @@ def __init__(
125
125
"""
126
126
Overview:
127
127
Initialize ``_alpha`` :math:`=\t heta * dt\`,
128
- ``beta`` :math:`= \sigma * \sqrt{dt}`, in Ornstein-Uhlenbeck process
128
+ ``beta`` :math:`= \sigma * \sqrt{dt}`, in Ornstein-Uhlenbeck process.
129
129
Arguments:
130
- - mu (:obj:`float`): :math:`\mu` , mean value
131
- - sigma (:obj:`float`): :math:`\sigma` , standard deviation of the perturbation noise
132
- - theta (:obj:`float`): how strongly the noise reacts to perturbations, \
133
- greater value means stronger reaction
134
- - dt (:obj:`float`): derivative of time t
135
- - x0 (:obj:`float` or :obj:`torch.Tensor`): initial action
130
+ - mu (:obj:`float`): :math:`\mu` , mean value.
131
+ - sigma (:obj:`float`): :math:`\sigma` , standard deviation of the perturbation noise.
132
+ - theta (:obj:`float`): How strongly the noise reacts to perturbations, \
133
+ greater value means stronger reaction.
134
+ - dt (:obj:`float`): The derivative of time t.
135
+ - x0 (:obj:`Union[float, torch.Tensor]`): The initial state of the noise, \
136
+ should be a scalar or tensor with the same shape as the action tensor.
136
137
"""
137
138
super ().__init__ ()
138
139
self ._mu = mu
@@ -144,21 +145,21 @@ def __init__(
144
145
def reset (self ) -> None :
145
146
"""
146
147
Overview:
147
- Reset ``_x`` to the initial state ``_x0``
148
+ Reset ``_x`` to the initial state ``_x0``.
148
149
"""
149
150
self ._x = deepcopy (self ._x0 )
150
151
151
152
def __call__ (self , shape : tuple , device : str , mu : Optional [float ] = None ) -> torch .Tensor :
152
153
"""
153
154
Overview:
154
- Generate gaussian noise according to action tensor's shape, device
155
+ Generate gaussian noise according to action tensor's shape, device.
155
156
Arguments:
156
- - shape (:obj:`tuple`): size of the action tensor, output noise's size should be the same
157
- - device (:obj:`str`): device of the action tensor, output noise's device should be the same as it
158
- - mu (:obj:`float`): new mean value :math:`\mu`, you can set it to `None` if don't need it
157
+ - shape (:obj:`tuple`): The size of the action tensor, output noise's size should be the same.
158
+ - device (:obj:`str`): The device of the action tensor, output noise's device should be the same as it.
159
+ - mu (:obj:`float`): The new mean value :math:`\mu`, you can set it to `None` if don't need it.
159
160
Returns:
160
161
- noise (:obj:`torch.Tensor`): generated action noise, \
161
- have the same shape and device with the input action tensor
162
+ have the same shape and device with the input action tensor.
162
163
"""
163
164
if self ._x is None or \
164
165
(isinstance (self ._x , torch .Tensor ) and self ._x .shape != shape ):
@@ -174,15 +175,15 @@ def __call__(self, shape: tuple, device: str, mu: Optional[float] = None) -> tor
174
175
def x0 (self ) -> Union [float , torch .Tensor ]:
175
176
"""
176
177
Overview:
177
- Get ``self._x0``
178
+ Get ``self._x0``.
178
179
"""
179
180
return self ._x0
180
181
181
182
@x0 .setter
182
183
def x0 (self , _x0 : Union [float , torch .Tensor ]) -> None :
183
184
"""
184
185
Overview:
185
- Set ``self._x0`` and reset ``self.x`` to ``self._x0`` as well
186
+ Set ``self._x0`` and reset ``self.x`` to ``self._x0`` as well.
186
187
"""
187
188
self ._x0 = _x0
188
189
self .reset ()
@@ -198,10 +199,10 @@ def create_noise_generator(noise_type: str, noise_kwargs: dict) -> BaseNoise:
198
199
or raise an KeyError. In other words, a derived noise generator must first register,
199
200
then call ``create_noise generator`` to get the instance object.
200
201
Arguments:
201
- - noise_type (:obj:`str`): the type of noise generator to be created
202
+ - noise_type (:obj:`str`): the type of noise generator to be created.
202
203
Returns:
203
204
- noise (:obj:`BaseNoise`): the created new noise generator, should be an instance of one of \
204
- noise_mapping's values
205
+ noise_mapping's values.
205
206
"""
206
207
if noise_type not in noise_mapping .keys ():
207
208
raise KeyError ("not support noise type: {}" .format (noise_type ))
0 commit comments