From 758f7e460da0d567753f6a9b218a9022d8e341c0 Mon Sep 17 00:00:00 2001 From: Luca Della Libera Date: Sat, 4 Mar 2023 18:15:04 -0500 Subject: [PATCH] Minor bug fixes --- README.md | 6 +- actorch/algorithms/algorithm.py | 66 ++++++++++++++----- examples/DDPG_LunarLanderContinuous-v2.py | 12 ++-- ...taParallelDDPG_LunarLanderContinuous-v2.py | 12 ++-- examples/TD3_LunarLanderContinuous-v2.py | 12 ++-- 5 files changed, 72 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 851c5c0..8c39d88 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,9 @@ actorch run REINFORCE_CartPole-v1.py # Run experiment **NOTE**: training artifacts (e.g. checkpoints, metrics, etc.) are saved in nested subdirectories. This might cause issues on Windows, since the maximum path length is 260 characters. In that case, -move your configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`). +move the configuration file (or set `local_dir`) to an upper level directory (e.g. `Desktop`), +shorten the configuration file name, and/or shorten the algorithm name +(e.g. `DistributedDataParallelREINFORCE.rename("DDPR")`). Wait for a few minutes until the training ends. The mean cumulative reward over the last 100 episodes should exceed 475, which means that the environment was @@ -186,8 +188,8 @@ successfully solved. You can now plot the performance metrics saved in the auto- (or [Matplotlib](https://matplotlib.org/)): ```bash +pip install actorch[vistool] # Install dependencies for VisTool cd experiments/REINFORCE_CartPole-v1/ -pip install actorch[vistool] actorch vistool plotly tensorboard ``` diff --git a/actorch/algorithms/algorithm.py b/actorch/algorithms/algorithm.py index b7e9b45..1fcd34b 100644 --- a/actorch/algorithms/algorithm.py +++ b/actorch/algorithms/algorithm.py @@ -455,6 +455,23 @@ def __init__( **kwargs, ) + @classmethod + def rename(cls, name: "str") -> "Type[Algorithm]": + """Return a copy of this class with + name set to `name`. + + Parameters + ---------- + name: + The new name. + + Returns + ------- + The renamed class. + + """ + return type(name, (cls,), {}) + # override def setup(self, config: "Dict[str, Any]") -> "None": self.config = Algorithm.Config(**self.config) @@ -1388,22 +1405,6 @@ def __init__( **worker_config, ) - # override - def setup(self, config: "Dict[str, Any]") -> "None": - config = DistributedDataParallelAlgorithm.Config(**config) - self.log_sys_usage = config.pop("log_sys_usage") - config["reduction_mode"] = "sum" - super().setup(config) - self.config["log_sys_usage"] = self.log_sys_usage - - # override - def reset_config(self, new_config: "Dict[str, Any]") -> "bool": - new_config = DistributedDataParallelAlgorithm.Config(**new_config) - if self.log_sys_usage != new_config.pop("log_sys_usage"): - return False - new_config["reduction_mode"] = "sum" - return super().reset_config(new_config) - # override @classmethod def get_worker_cls(cls) -> "Type[Trainable]": @@ -1489,6 +1490,39 @@ def _seed(self) -> "None": return Worker + @classmethod + def rename(cls, name: "str") -> "Type[DistributedDataParallelAlgorithm]": + """Return a copy of this class with + name set to `name`. + + Parameters + ---------- + name: + The new name. + + Returns + ------- + The renamed class. + + """ + return type(name, (cls,), {}) + + # override + def setup(self, config: "Dict[str, Any]") -> "None": + config = DistributedDataParallelAlgorithm.Config(**config) + self.log_sys_usage = config.pop("log_sys_usage") + config["reduction_mode"] = "sum" + super().setup(config) + self.config["log_sys_usage"] = self.log_sys_usage + + # override + def reset_config(self, new_config: "Dict[str, Any]") -> "bool": + new_config = DistributedDataParallelAlgorithm.Config(**new_config) + if self.log_sys_usage != new_config.pop("log_sys_usage"): + return False + new_config["reduction_mode"] = "sum" + return super().reset_config(new_config) + # override def _reduce(self, results: "Sequence[Dict[str, Any]]") -> "Dict[str, Any]": reduced = results[0] diff --git a/examples/DDPG_LunarLanderContinuous-v2.py b/examples/DDPG_LunarLanderContinuous-v2.py index 16c15ad..de4912a 100644 --- a/examples/DDPG_LunarLanderContinuous-v2.py +++ b/examples/DDPG_LunarLanderContinuous-v2.py @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet): # override def _setup_torso(self, in_shape): super()._setup_torso(in_shape) - idx = 0 - for module in self.torso[:]: - idx += 1 + torso = nn.Sequential() + for module in self.torso: + torso.append(module) if isinstance(module, nn.Linear): - self.torso.insert( - idx, nn.LayerNorm(module.out_features, elementwise_affine=False) + torso.append( + nn.LayerNorm(module.out_features, elementwise_affine=False) ) - idx += 1 + self.torso = torso experiment_params = ExperimentParams( diff --git a/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py b/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py index 20603b4..f9319a5 100644 --- a/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py +++ b/examples/DistributedDataParallelDDPG_LunarLanderContinuous-v2.py @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet): # override def _setup_torso(self, in_shape): super()._setup_torso(in_shape) - idx = 0 - for module in self.torso[:]: - idx += 1 + torso = nn.Sequential() + for module in self.torso: + torso.append(module) if isinstance(module, nn.Linear): - self.torso.insert( - idx, nn.LayerNorm(module.out_features, elementwise_affine=False) + torso.append( + nn.LayerNorm(module.out_features, elementwise_affine=False) ) - idx += 1 + self.torso = torso experiment_params = ExperimentParams( diff --git a/examples/TD3_LunarLanderContinuous-v2.py b/examples/TD3_LunarLanderContinuous-v2.py index d7675b0..258e7c1 100644 --- a/examples/TD3_LunarLanderContinuous-v2.py +++ b/examples/TD3_LunarLanderContinuous-v2.py @@ -31,14 +31,14 @@ class LayerNormFCNet(FCNet): # override def _setup_torso(self, in_shape): super()._setup_torso(in_shape) - idx = 0 - for module in self.torso[:]: - idx += 1 + torso = nn.Sequential() + for module in self.torso: + torso.append(module) if isinstance(module, nn.Linear): - self.torso.insert( - idx, nn.LayerNorm(module.out_features, elementwise_affine=False) + torso.append( + nn.LayerNorm(module.out_features, elementwise_affine=False) ) - idx += 1 + self.torso = torso experiment_params = ExperimentParams(