diff --git a/readme/tuto_library.md b/readme/tuto_library.md index 90f0629..25e1b05 100644 --- a/readme/tuto_library.md +++ b/readme/tuto_library.md @@ -1161,11 +1161,11 @@ Note that `train()` returns a python dictionary in which you can store the metri p_targ.data.mul_(self.polyak) p_targ.data.add_((1 - self.polyak) * p.data) ret_dict = dict( - loss_actor=loss_pi.detach(), - loss_critic=loss_q.detach(), + loss_actor=loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), ) if self.learn_entropy_coef: - ret_dict["loss_entropy_coef"] = loss_alpha.detach() + ret_dict["loss_entropy_coef"] = loss_alpha.detach().item() ret_dict["entropy_coef"] = alpha_t.item() return ret_dict # dictionary of metrics to be logged ``` diff --git a/setup.py b/setup.py index 1d3320d..c51d8da 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False): install_req = [ 'numpy', - 'torch', + 'torch>=2.0.0', 'pandas', 'gymnasium', 'rtgym>=0.9', @@ -111,7 +111,8 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False): 'keyboard', 'pyautogui', 'pyinstrument', - 'tlspyo>=0.2.5' + 'tlspyo>=0.2.5', + 'chardet' # requests dependency ] if platform.system() == "Windows": @@ -127,13 +128,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False): setup( name='tmrl', - version='0.5.2', + version='0.5.3', description='Network-based framework for real-time robot learning', long_description=README, long_description_content_type='text/markdown', keywords='reinforcement learning, robot learning, trackmania, self driving, roborace', url='https://github.com/trackmania-rl/tmrl', - download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.5.2.tar.gz', + download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.5.3.tar.gz', author='Yann Bouteiller, Edouard Geze', author_email='yann.bouteiller@polymtl.ca, edouard.geze@hotmail.fr', license='MIT', diff --git a/tmrl/custom/custom_algorithms.py b/tmrl/custom/custom_algorithms.py index 85cf250..2d7c362 100644 --- a/tmrl/custom/custom_algorithms.py +++ b/tmrl/custom/custom_algorithms.py @@ -157,8 +157,8 @@ def train(self, batch): if not cfg.DEBUG_MODE: ret_dict = dict( - loss_actor=loss_pi.detach(), - loss_critic=loss_q.detach(), + loss_actor=loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), ) else: q1_o2_a2 = self.model.q1(o2, a2) @@ -182,8 +182,8 @@ def train(self, batch): diff_q2_backup_r = (q2 - backup + r).detach() ret_dict = dict( - loss_actor=loss_pi.detach(), - loss_critic=loss_q.detach(), + loss_actor=loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), # debug: debug_log_pi=logp_pi.detach().mean(), debug_log_pi_std=logp_pi.detach().std(), @@ -248,7 +248,7 @@ def train(self, batch): ) if self.learn_entropy_coef: - ret_dict["loss_entropy_coef"] = loss_alpha.detach() + ret_dict["loss_entropy_coef"] = loss_alpha.detach().item() ret_dict["entropy_coef"] = alpha_t.item() return ret_dict @@ -375,12 +375,12 @@ def train(self, batch): if update_policy: self.loss_pi = loss_pi.detach() ret_dict = dict( - loss_actor=self.loss_pi, - loss_critic=loss_q.detach(), + loss_actor=self.loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), ) if self.learn_entropy_coef: - ret_dict["loss_entropy_coef"] = loss_alpha.detach() + ret_dict["loss_entropy_coef"] = loss_alpha.detach().item() ret_dict["entropy_coef"] = alpha_t.item() return ret_dict diff --git a/tmrl/custom/utils/nn.py b/tmrl/custom/utils/nn.py index 710edcf..7aaf0cd 100644 --- a/tmrl/custom/utils/nn.py +++ b/tmrl/custom/utils/nn.py @@ -41,7 +41,7 @@ def copy_shared(model_a): for key in sda: a, b = sda[key], sdb[key] b.data = a.data # strangely this will not make a.data and b.data the same object but their underlying data_ptr will be the same - assert b.storage().data_ptr() == a.storage().data_ptr() + assert b.untyped_storage().data_ptr() == a.untyped_storage().data_ptr() return model_b diff --git a/tmrl/tuto/competition/custom_actor_module.py b/tmrl/tuto/competition/custom_actor_module.py index 73103af..d3e91e5 100644 --- a/tmrl/tuto/competition/custom_actor_module.py +++ b/tmrl/tuto/competition/custom_actor_module.py @@ -781,8 +781,8 @@ def train(self, batch): # TMRL enables us to log training metrics to wandb: ret_dict = dict( - loss_actor=loss_pi.detach(), - loss_critic=loss_q.detach(), + loss_actor=loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), ) return ret_dict diff --git a/tmrl/tuto/tuto.py b/tmrl/tuto/tuto.py index 63c2c01..33ad83b 100644 --- a/tmrl/tuto/tuto.py +++ b/tmrl/tuto/tuto.py @@ -551,11 +551,11 @@ def train(self, batch): p_targ.data.mul_(self.polyak) p_targ.data.add_((1 - self.polyak) * p.data) ret_dict = dict( - loss_actor=loss_pi.detach(), - loss_critic=loss_q.detach(), + loss_actor=loss_pi.detach().item(), + loss_critic=loss_q.detach().item(), ) if self.learn_entropy_coef: - ret_dict["loss_entropy_coef"] = loss_alpha.detach() + ret_dict["loss_entropy_coef"] = loss_alpha.detach().item() ret_dict["entropy_coef"] = alpha_t.item() return ret_dict