@@ -212,75 +212,75 @@ def get_observations(
212212
213213 return self ._getattr_by_index ("observations" , indices , global_ts )
214214
215- def get_actions (
215+ def get_infos (
216216 self , indices : Union [int , List [int ]] = - 1 , global_ts : bool = True
217217 ) -> MultiAgentDict :
218- """Gets actions for all agents that stepped in the last timesteps.
218+ """Gets infos for all agents that stepped in the last timesteps.
219219
220- Note that actions are only returned for agents that stepped
220+ Note that infos are only returned for agents that stepped
221221 during the given index range.
222222
223223 Args:
224224 indices: Either a single index or a list of indices. The indices
225225 can be reversed (e.g. [-1, -2]) or absolute (e.g. [98, 99]).
226- This defines the time indices for which the actions
226+ This defines the time indices for which the infos
227227 should be returned.
228228 global_ts: Boolean that defines, if the indices should be considered
229229 environment (`True`) or agent (`False`) steps.
230230
231- Returns: A dictionary mapping agent ids to actions (of different
231+ Returns: A dictionary mapping agent ids to infos (of different
232232 timesteps). Only for agents that have stepped (were ready) at a
233- timestep, actions are returned (i.e. not all agent ids are
233+ timestep, infos are returned (i.e. not all agent ids are
234234 necessarily in the keys).
235235 """
236+ return self ._getattr_by_index ("infos" , indices , global_ts )
236237
237- return self ._getattr_by_index ("actions" , indices , global_ts )
238-
239- def get_rewards (
238+ def get_actions (
240239 self , indices : Union [int , List [int ]] = - 1 , global_ts : bool = True
241240 ) -> MultiAgentDict :
242- """Gets rewards for all agents that stepped in the last timesteps.
241+ """Gets actions for all agents that stepped in the last timesteps.
243242
244- Note that rewards are only returned for agents that stepped
243+ Note that actions are only returned for agents that stepped
245244 during the given index range.
246245
247246 Args:
248247 indices: Either a single index or a list of indices. The indices
249248 can be reversed (e.g. [-1, -2]) or absolute (e.g. [98, 99]).
250- This defines the time indices for which the rewards
249+ This defines the time indices for which the actions
251250 should be returned.
252251 global_ts: Boolean that defines, if the indices should be considered
253252 environment (`True`) or agent (`False`) steps.
254253
255- Returns: A dictionary mapping agent ids to rewards (of different
254+ Returns: A dictionary mapping agent ids to actions (of different
256255 timesteps). Only for agents that have stepped (were ready) at a
257- timestep, rewards are returned (i.e. not all agent ids are
256+ timestep, actions are returned (i.e. not all agent ids are
258257 necessarily in the keys).
259258 """
260- return self ._getattr_by_index ("rewards" , indices , global_ts )
261259
262- def get_infos (
260+ return self ._getattr_by_index ("actions" , indices , global_ts )
261+
262+ def get_rewards (
263263 self , indices : Union [int , List [int ]] = - 1 , global_ts : bool = True
264264 ) -> MultiAgentDict :
265- """Gets infos for all agents that stepped in the last timesteps.
265+ """Gets rewards for all agents that stepped in the last timesteps.
266266
267- Note that infos are only returned for agents that stepped
267+ Note that rewards are only returned for agents that stepped
268268 during the given index range.
269269
270270 Args:
271271 indices: Either a single index or a list of indices. The indices
272272 can be reversed (e.g. [-1, -2]) or absolute (e.g. [98, 99]).
273- This defines the time indices for which the infos
273+ This defines the time indices for which the rewards
274274 should be returned.
275275 global_ts: Boolean that defines, if the indices should be considered
276276 environment (`True`) or agent (`False`) steps.
277277
278- Returns: A dictionary mapping agent ids to infos (of different
278+ Returns: A dictionary mapping agent ids to rewards (of different
279279 timesteps). Only for agents that have stepped (were ready) at a
280- timestep, infos are returned (i.e. not all agent ids are
280+ timestep, rewards are returned (i.e. not all agent ids are
281281 necessarily in the keys).
282282 """
283- return self ._getattr_by_index ("infos " , indices , global_ts )
283+ return self ._getattr_by_index ("rewards " , indices , global_ts )
284284
285285 def get_extra_model_outputs (
286286 self , indices : Union [int , List [int ]] = - 1 , global_ts : bool = True
0 commit comments