diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py index 9f010a782b8..7e7509095f9 100644 --- a/torchrl/objectives/value/advantages.py +++ b/torchrl/objectives/value/advantages.py @@ -1081,6 +1081,16 @@ def __init__( self.vectorized = vectorized self.time_dim = time_dim + @property + def vectorized(self): + if is_dynamo_compiling(): + return False + return self._vectorized + + @vectorized.setter + def vectorized(self, value): + self._vectorized = value + @_self_set_skip_existing @_self_set_grad_enabled @dispatch @@ -1206,6 +1216,8 @@ def value_estimate( if steps_to_next_obs is not None: gamma = gamma ** steps_to_next_obs.view_as(reward) + if self.lmbda.device != device: + self.lmbda = self.lmbda.to(device) lmbda = self.lmbda if self.average_rewards: reward = reward - reward.mean()