Skip to content

Commit d88ebe2

Browse files
committed
polish(nyz): polish api doc details
1 parent 96ccaed commit d88ebe2

File tree

12 files changed

+25
-29
lines changed

12 files changed

+25
-29
lines changed

ding/bonus/a2c.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
- model (:obj:`torch.nn.Module`): The model of A2C algorithm, which should be an instance of class \
7171
:class:`ding.model.VAC`. \
7272
If not specified, a default model will be generated according to the configuration.
73-
- cfg (:obj:Union[EasyDict, dict]): The configuration of A2C algorithm, which is a dict. \
73+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of A2C algorithm, which is a dict. \
7474
Default to None. If not specified, the default configuration will be used. \
7575
The default configuration can be found in ``ding/config/example/A2C/gym_lunarlander_v2.py``.
7676
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/c51.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,8 @@ def __init__(
6868
- exp_name (:obj:`str`): The name of this experiment, which will be used to create the folder to save \
6969
log data. Default to None. If not specified, the folder name will be ``env_id``-``algorithm``.
7070
- model (:obj:`torch.nn.Module`): The model of C51 algorithm, which should be an instance of class \
71-
:class:`ding.model.C51DQN`. \
72-
If not specified, a default model will be generated according to the configuration.
73-
- cfg (:obj:Union[EasyDict, dict]): The configuration of C51 algorithm, which is a dict. \
71+
:class:`ding.model.C51DQN`. If not specified, a default model will be generated according to the config.
72+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of C51 algorithm, which is a dict. \
7473
Default to None. If not specified, the default configuration will be used. \
7574
The default configuration can be found in ``ding/config/example/C51/gym_lunarlander_v2.py``.
7675
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/ddpg.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
- model (:obj:`torch.nn.Module`): The model of DDPG algorithm, which should be an instance of class \
7171
:class:`ding.model.ContinuousQAC`. \
7272
If not specified, a default model will be generated according to the configuration.
73-
- cfg (:obj:Union[EasyDict, dict]): The configuration of DDPG algorithm, which is a dict. \
73+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of DDPG algorithm, which is a dict. \
7474
Default to None. If not specified, the default configuration will be used. \
7575
The default configuration can be found in ``ding/config/example/DDPG/gym_lunarlander_v2.py``.
7676
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/dqn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
- model (:obj:`torch.nn.Module`): The model of DQN algorithm, which should be an instance of class \
7171
:class:`ding.model.DQN`. \
7272
If not specified, a default model will be generated according to the configuration.
73-
- cfg (:obj:Union[EasyDict, dict]): The configuration of DQN algorithm, which is a dict. \
73+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of DQN algorithm, which is a dict. \
7474
Default to None. If not specified, the default configuration will be used. \
7575
The default configuration can be found in ``ding/config/example/DQN/gym_lunarlander_v2.py``.
7676
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/pg.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def __init__(
6868
- model (:obj:`torch.nn.Module`): The model of PG algorithm, which should be an instance of class \
6969
:class:`ding.model.PG`. \
7070
If not specified, a default model will be generated according to the configuration.
71-
- cfg (:obj:Union[EasyDict, dict]): The configuration of PG algorithm, which is a dict. \
71+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of PG algorithm, which is a dict. \
7272
Default to None. If not specified, the default configuration will be used. \
7373
The default configuration can be found in ``ding/config/example/PG/gym_lunarlander_v2.py``.
7474
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/ppo_offpolicy.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
- model (:obj:`torch.nn.Module`): The model of PPO (offpolicy) algorithm, \
7171
which should be an instance of class :class:`ding.model.VAC`. \
7272
If not specified, a default model will be generated according to the configuration.
73-
- cfg (:obj:Union[EasyDict, dict]): The configuration of PPO (offpolicy) algorithm, which is a dict. \
73+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of PPO (offpolicy) algorithm, which is a dict. \
7474
Default to None. If not specified, the default configuration will be used. \
7575
The default configuration can be found in ``ding/config/example/PPO (offpolicy)/gym_lunarlander_v2.py``.
7676
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/bonus/sac.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def __init__(
7171
- model (:obj:`torch.nn.Module`): The model of SAC algorithm, which should be an instance of class \
7272
:class:`ding.model.ContinuousQAC`. \
7373
If not specified, a default model will be generated according to the configuration.
74-
- cfg (:obj:Union[EasyDict, dict]): The configuration of SAC algorithm, which is a dict. \
74+
- cfg (:obj:`Union[EasyDict, dict]`): The configuration of SAC algorithm, which is a dict. \
7575
Default to None. If not specified, the default configuration will be used. \
7676
The default configuration can be found in ``ding/config/example/SAC/gym_lunarlander_v2.py``.
7777
- policy_state_dict (:obj:`str`): The path of policy state dict saved by PyTorch a in local file. \

ding/envs/env_manager/base_env_manager.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def __init__(
124124
125125
.. note::
126126
For more details about how to merge config, please refer to the system document of DI-engine \
127-
(`en link <../03_system/config.html>`_).
127+
(`en link1 <../03_system/config.html>`_).
128128
"""
129129
self._cfg = cfg
130130
self._env_fn = env_fn
@@ -484,7 +484,7 @@ def seed(self, seed: Union[Dict[int, int], List[int], int], dynamic_seed: bool =
484484
485485
.. note::
486486
For more details about ``dynamic_seed``, please refer to the best practice document of DI-engine \
487-
(`en link <../04_best_practice/random_seed.html>`_).
487+
(`en link2 <../04_best_practice/random_seed.html>`_).
488488
"""
489489
if isinstance(seed, numbers.Integral):
490490
seed = [seed + i for i in range(self.env_num)]
@@ -580,7 +580,7 @@ class BaseEnvManagerV2(BaseEnvManager):
580580
581581
.. note::
582582
For more details about new task pipeline, please refer to the system document of DI-engine \
583-
(`system en link <../03_system/index.html>`_).
583+
(`system en link3 <../03_system/index.html>`_).
584584
585585
Interfaces:
586586
reset, step, seed, close, enable_save_replay, launch, default_config, reward_shaping, enable_save_figure

ding/model/template/qgpo.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,9 @@ def q_loss_fn(self, a, s, r, s_, d, fake_a_, discount=0.99):
418418
- a (:obj:`torch.Tensor`): The input action.
419419
- s (:obj:`torch.Tensor`): The input state.
420420
- r (:obj:`torch.Tensor`): The input reward.
421-
- s_ (:obj:`torch.Tensor`): The input next state.
421+
- s\_ (:obj:`torch.Tensor`): The input next state.
422422
- d (:obj:`torch.Tensor`): The input done.
423-
- fake_a_ (:obj:`torch.Tensor`): The input fake action.
423+
- fake_a (:obj:`torch.Tensor`): The input fake action.
424424
- discount (:obj:`float`): The discount factor.
425425
"""
426426

ding/policy/qgpo.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,11 @@
1313
@POLICY_REGISTRY.register('qgpo')
1414
class QGPOPolicy(Policy):
1515
"""
16-
Overview:
17-
Policy class of QGPO algorithm
18-
Contrastive Energy Prediction for Exact Energy-Guided Diffusion Sampling in Offline Reinforcement Learning
19-
https://arxiv.org/abs/2304.12824
20-
Interfaces:
21-
``__init__``, ``forward``, ``learn``, ``eval``, ``state_dict``, ``load_state_dict``
16+
Overview:
17+
Policy class of QGPO algorithm (https://arxiv.org/abs/2304.12824).
18+
Contrastive Energy Prediction for Exact Energy-Guided Diffusion Sampling in Offline Reinforcement Learning
19+
Interfaces:
20+
``__init__``, ``forward``, ``learn``, ``eval``, ``state_dict``, ``load_state_dict``
2221
"""
2322

2423
config = dict(

ding/rl_utils/value_rescale.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ def value_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
55
"""
66
Overview:
77
A function to reduce the scale of the action-value function.
8-
:math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \eps * x` .
8+
:math: `h(x) = sign(x)(\sqrt{(abs(x)+1)} - 1) + \epsilon * x` .
99
Arguments:
1010
- x: (:obj:`torch.Tensor`) The input tensor to be normalized.
1111
- eps: (:obj:`float`) The coefficient of the additive regularization term \
12-
to ensure h^{-1} is Lipschitz continuous
12+
to ensure inverse function is Lipschitz continuous
1313
Returns:
1414
- (:obj:`torch.Tensor`) Normalized tensor.
1515
@@ -23,11 +23,11 @@ def value_inv_transform(x: torch.Tensor, eps: float = 1e-2) -> torch.Tensor:
2323
"""
2424
Overview:
2525
The inverse form of value rescale.
26-
:math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\eps(|x|+1+\eps)}-1}{2\eps})}^2-1)` .
26+
:math: `h^{-1}(x) = sign(x)({(\frac{\sqrt{1+4\epsilon(|x|+1+\epsilon)}-1}{2\epsilon})}^2-1)` .
2727
Arguments:
2828
- x: (:obj:`torch.Tensor`) The input tensor to be unnormalized.
2929
- eps: (:obj:`float`) The coefficient of the additive regularization term \
30-
to ensure h^{-1} is Lipschitz continuous
30+
to ensure inverse function is Lipschitz continuous
3131
Returns:
3232
- (:obj:`torch.Tensor`) Unnormalized tensor.
3333
"""

ding/torch_utils/network/gtrxl.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ def update(self, hidden_state: List[torch.Tensor]):
167167
"""
168168
Overview:
169169
Update the memory given a sequence of hidden states.
170-
Example for single layer:
171-
memory_len=3, hidden_size_len=2, bs=3
170+
Example for single layer: (memory_len=3, hidden_size_len=2, bs=3)
172171
173172
m00 m01 m02 h00 h01 h02 m20 m21 m22
174173
m = m10 m11 m12 h = h10 h11 h12 => new_m = h00 h01 h02
@@ -264,9 +263,8 @@ def _rel_shift(self, x: torch.Tensor, zero_upper: bool = False) -> torch.Tensor:
264263
4) Mask out the upper triangle (optional)
265264
266265
.. note::
267-
See the following material for better understanding:
268-
https://github.com/kimiyoung/transformer-xl/issues/8
269-
https://arxiv.org/pdf/1901.02860.pdf (Appendix B)
266+
See the following material for better understanding: https://github.com/kimiyoung/transformer-xl/issues/8 \
267+
https://arxiv.org/pdf/1901.02860.pdf (Appendix B)
270268
Arguments:
271269
- x (:obj:`torch.Tensor`): The input tensor with shape (cur_seq, full_seq, bs, head_num).
272270
- zero_upper (:obj:`bool`): If True, the upper-right triangle of the matrix is set to zero.

0 commit comments

Comments
 (0)