From 758f7e460da0d567753f6a9b218a9022d8e341c0 Mon Sep 17 00:00:00 2001
From: Luca Della Libera <luca.dellalib@gmail.com>
Date: Sat, 4 Mar 2023 18:15:04 -0500
Subject: [PATCH] Minor bug fixes

---
 README.md                                     |  6 +-
 actorch/algorithms/algorithm.py               | 66 ++++++++++++++-----
 examples/DDPG_LunarLanderContinuous-v2.py     | 12 ++--
 ...taParallelDDPG_LunarLanderContinuous-v2.py | 12 ++--
 examples/TD3_LunarLanderContinuous-v2.py      | 12 ++--
 5 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 851c5c0..8c39d88 100644
--- a/README.md
+++ b/README.md
@@ -177,7 +177,9 @@ actorch run REINFORCE_CartPole-v1.py    # Run experiment
 
 **NOTE**: training artifacts (e.g. checkpoints, metrics, etc.) are saved in nested subdirectories.
 This might cause issues on Windows, since the maximum path length is 260 characters. In that case,
-move your configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`).
+move the configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`),
+shorten the configuration file name, and/or shorten the algorithm name
+(e.g. `DistributedDataParallelREINFORCE.rename("DDPR")`).
 
 Wait for a few minutes until the training ends. The mean cumulative reward over
 the last 100 episodes should exceed 475, which means that the environment was
@@ -186,8 +188,8 @@ successfully solved. You can now plot the performance metrics saved in the auto-
 (or [Matplotlib](https://matplotlib.org/)):
 
 ```bash
+pip install actorch[vistool]  # Install dependencies for VisTool
 cd experiments/REINFORCE_CartPole-v1/<auto-generated-experiment-name>
-pip install actorch[vistool]
 actorch vistool plotly tensorboard
 ```
 
diff --git a/actorch/algorithms/algorithm.py b/actorch/algorithms/algorithm.py
index b7e9b45..1fcd34b 100644
--- a/actorch/algorithms/algorithm.py
+++ b/actorch/algorithms/algorithm.py
@@ -455,6 +455,23 @@ def __init__(
                 **kwargs,
             )
 
+    @classmethod
+    def rename(cls, name: "str") -> "Type[Algorithm]":
+        """Return a copy of this class with
+        name set to `name`.
+
+        Parameters
+        ----------
+        name:
+            The new name.
+
+        Returns
+        -------
+            The renamed class.
+
+        """
+        return type(name, (cls,), {})
+
     # override
     def setup(self, config: "Dict[str, Any]") -> "None":
         self.config = Algorithm.Config(**self.config)
@@ -1388,22 +1405,6 @@ def __init__(
                 **worker_config,
             )
 
-    # override
-    def setup(self, config: "Dict[str, Any]") -> "None":
-        config = DistributedDataParallelAlgorithm.Config(**config)
-        self.log_sys_usage = config.pop("log_sys_usage")
-        config["reduction_mode"] = "sum"
-        super().setup(config)
-        self.config["log_sys_usage"] = self.log_sys_usage
-
-    # override
-    def reset_config(self, new_config: "Dict[str, Any]") -> "bool":
-        new_config = DistributedDataParallelAlgorithm.Config(**new_config)
-        if self.log_sys_usage != new_config.pop("log_sys_usage"):
-            return False
-        new_config["reduction_mode"] = "sum"
-        return super().reset_config(new_config)
-
     # override
     @classmethod
     def get_worker_cls(cls) -> "Type[Trainable]":
@@ -1489,6 +1490,39 @@ def _seed(self) -> "None":
 
         return Worker
 
+    @classmethod
+    def rename(cls, name: "str") -> "Type[DistributedDataParallelAlgorithm]":
+        """Return a copy of this class with
+        name set to `name`.
+
+        Parameters
+        ----------
+        name:
+            The new name.
+
+        Returns
+        -------
+            The renamed class.
+
+        """
+        return type(name, (cls,), {})
+
+    # override
+    def setup(self, config: "Dict[str, Any]") -> "None":
+        config = DistributedDataParallelAlgorithm.Config(**config)
+        self.log_sys_usage = config.pop("log_sys_usage")
+        config["reduction_mode"] = "sum"
+        super().setup(config)
+        self.config["log_sys_usage"] = self.log_sys_usage
+
+    # override
+    def reset_config(self, new_config: "Dict[str, Any]") -> "bool":
+        new_config = DistributedDataParallelAlgorithm.Config(**new_config)
+        if self.log_sys_usage != new_config.pop("log_sys_usage"):
+            return False
+        new_config["reduction_mode"] = "sum"
+        return super().reset_config(new_config)
+
     # override
     def _reduce(self, results: "Sequence[Dict[str, Any]]") -> "Dict[str, Any]":
         reduced = results[0]
diff --git a/examples/DDPG_LunarLanderContinuous-v2.py b/examples/DDPG_LunarLanderContinuous-v2.py
index 16c15ad..de4912a 100644
--- a/examples/DDPG_LunarLanderContinuous-v2.py
+++ b/examples/DDPG_LunarLanderContinuous-v2.py
@@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
     # override
     def _setup_torso(self, in_shape):
         super()._setup_torso(in_shape)
-        idx = 0
-        for module in self.torso[:]:
-            idx += 1
+        torso = nn.Sequential()
+        for module in self.torso:
+            torso.append(module)
             if isinstance(module, nn.Linear):
-                self.torso.insert(
-                    idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
+                torso.append(
+                    nn.LayerNorm(module.out_features, elementwise_affine=False)
                 )
-                idx += 1
+        self.torso = torso
 
 
 experiment_params = ExperimentParams(
diff --git a/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py b/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py
index 20603b4..f9319a5 100644
--- a/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py
+++ b/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py
@@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
     # override
     def _setup_torso(self, in_shape):
         super()._setup_torso(in_shape)
-        idx = 0
-        for module in self.torso[:]:
-            idx += 1
+        torso = nn.Sequential()
+        for module in self.torso:
+            torso.append(module)
             if isinstance(module, nn.Linear):
-                self.torso.insert(
-                    idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
+                torso.append(
+                    nn.LayerNorm(module.out_features, elementwise_affine=False)
                 )
-                idx += 1
+        self.torso = torso
 
 
 experiment_params = ExperimentParams(
diff --git a/examples/TD3_LunarLanderContinuous-v2.py b/examples/TD3_LunarLanderContinuous-v2.py
index d7675b0..258e7c1 100644
--- a/examples/TD3_LunarLanderContinuous-v2.py
+++ b/examples/TD3_LunarLanderContinuous-v2.py
@@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet):
     # override
     def _setup_torso(self, in_shape):
         super()._setup_torso(in_shape)
-        idx = 0
-        for module in self.torso[:]:
-            idx += 1
+        torso = nn.Sequential()
+        for module in self.torso:
+            torso.append(module)
             if isinstance(module, nn.Linear):
-                self.torso.insert(
-                    idx, nn.LayerNorm(module.out_features, elementwise_affine=False)
+                torso.append(
+                    nn.LayerNorm(module.out_features, elementwise_affine=False)
                 )
-                idx += 1
+        self.torso = torso
 
 
 experiment_params = ExperimentParams(