Skip to content

Commit

Permalink
Minor bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lucadellalib committed Mar 4, 2023
1 parent 2caceb0 commit 758f7e4
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 36 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ actorch run REINFORCE_CartPole-v1.py # Run experiment

**NOTE**: training artifacts (e.g. checkpoints, metrics, etc.) are saved in nested subdirectories.
This might cause issues on Windows, since the maximum path length is 260 characters. In that case,
move your configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`).
move the configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`),
shorten the configuration file name, and/or shorten the algorithm name
(e.g. `DistributedDataParallelREINFORCE.rename("DDPR")`).

Wait for a few minutes until the training ends. The mean cumulative reward over
the last 100 episodes should exceed 475, which means that the environment was
Expand All @@ -186,8 +188,8 @@ successfully solved. You can now plot the performance metrics saved in the auto-
(or [Matplotlib](https://matplotlib.org/)):

```bash
pip install actorch[vistool] # Install dependencies for VisTool
cd experiments/REINFORCE_CartPole-v1/<auto-generated-experiment-name>
pip install actorch[vistool]
actorch vistool plotly tensorboard
```

Expand Down
66 changes: 50 additions & 16 deletions actorch/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,23 @@ def __init__(
**kwargs,
)

@classmethod
def rename(cls, name: "str") -> "Type[Algorithm]":
"""Return a copy of this class with
name set to `name`.
Parameters
----------
name:
The new name.
Returns
-------
The renamed class.
"""
return type(name, (cls,), {})

# override
def setup(self, config: "Dict[str, Any]") -> "None":
self.config = Algorithm.Config(**self.config)
Expand Down Expand Up @@ -1388,22 +1405,6 @@ def __init__(
**worker_config,
)

# override
def setup(self, config: "Dict[str, Any]") -> "None":
config = DistributedDataParallelAlgorithm.Config(**config)
self.log_sys_usage = config.pop("log_sys_usage")
config["reduction_mode"] = "sum"
super().setup(config)
self.config["log_sys_usage"] = self.log_sys_usage

# override
def reset_config(self, new_config: "Dict[str, Any]") -> "bool":
new_config = DistributedDataParallelAlgorithm.Config(**new_config)
if self.log_sys_usage != new_config.pop("log_sys_usage"):
return False
new_config["reduction_mode"] = "sum"
return super().reset_config(new_config)

# override
@classmethod
def get_worker_cls(cls) -> "Type[Trainable]":
Expand Down Expand Up @@ -1489,6 +1490,39 @@ def _seed(self) -> "None":

return Worker

@classmethod
def rename(cls, name: "str") -> "Type[DistributedDataParallelAlgorithm]":
"""Return a copy of this class with
name set to `name`.
Parameters
----------
name:
The new name.
Returns
-------
The renamed class.
"""
return type(name, (cls,), {})

# override
def setup(self, config: "Dict[str, Any]") -> "None":
config = DistributedDataParallelAlgorithm.Config(**config)
self.log_sys_usage = config.pop("log_sys_usage")
config["reduction_mode"] = "sum"
super().setup(config)
self.config["log_sys_usage"] = self.log_sys_usage

# override
def reset_config(self, new_config: "Dict[str, Any]") -> "bool":
new_config = DistributedDataParallelAlgorithm.Config(**new_config)
if self.log_sys_usage != new_config.pop("log_sys_usage"):
return False
new_config["reduction_mode"] = "sum"
return super().reset_config(new_config)

# override
def _reduce(self, results: "Sequence[Dict[str, Any]]") -> "Dict[str, Any]":
reduced = results[0]
Expand Down
12 changes: 6 additions & 6 deletions examples/DDPG_LunarLanderContinuous-v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
# override
def _setup_torso(self, in_shape):
super()._setup_torso(in_shape)
idx = 0
for module in self.torso[:]:
idx += 1
torso = nn.Sequential()
for module in self.torso:
torso.append(module)
if isinstance(module, nn.Linear):
self.torso.insert(
idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
torso.append(
nn.LayerNorm(module.out_features, elementwise_affine=False)
)
idx += 1
self.torso = torso


experiment_params = ExperimentParams(
Expand Down
12 changes: 6 additions & 6 deletions examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
# override
def _setup_torso(self, in_shape):
super()._setup_torso(in_shape)
idx = 0
for module in self.torso[:]:
idx += 1
torso = nn.Sequential()
for module in self.torso:
torso.append(module)
if isinstance(module, nn.Linear):
self.torso.insert(
idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
torso.append(
nn.LayerNorm(module.out_features, elementwise_affine=False)
)
idx += 1
self.torso = torso


experiment_params = ExperimentParams(
Expand Down
12 changes: 6 additions & 6 deletions examples/TD3_LunarLanderContinuous-v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
# override
def _setup_torso(self, in_shape):
super()._setup_torso(in_shape)
idx = 0
for module in self.torso[:]:
idx += 1
torso = nn.Sequential()
for module in self.torso:
torso.append(module)
if isinstance(module, nn.Linear):
self.torso.insert(
idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
torso.append(
nn.LayerNorm(module.out_features, elementwise_affine=False)
)
idx += 1
self.torso = torso


experiment_params = ExperimentParams(
Expand Down

0 comments on commit 758f7e4

Please sign in to comment.