@@ -332,6 +332,7 @@ def __init__(self, algo_class=None):
332332 self .enable_connectors = True
333333 self ._env_to_module_connector = None
334334 self ._module_to_env_connector = None
335+ self .episode_lookback_horizon = 1
335336 # TODO (sven): Rename into `sample_timesteps` (or `sample_duration`
336337 # and `sample_duration_unit` (replacing batch_mode), like we do it
337338 # in the evaluation config).
@@ -1405,6 +1406,7 @@ def rollouts(
14051406 module_to_env_connector : Optional [
14061407 Callable [[EnvType , "RLModule" ], "ConnectorV2" ]
14071408 ] = NotProvided ,
1409+ episode_lookback_horizon : Optional [int ] = NotProvided ,
14081410 use_worker_filter_stats : Optional [bool ] = NotProvided ,
14091411 update_worker_filter_stats : Optional [bool ] = NotProvided ,
14101412 rollout_fragment_length : Optional [Union [int , str ]] = NotProvided ,
@@ -1455,6 +1457,13 @@ def rollouts(
14551457 module_to_env_connector: A callable taking an Env and an RLModule as input
14561458 args and returning a module-to-env ConnectorV2 (might be a pipeline)
14571459 object.
1460+ episode_lookback_horizon: The amount of data (in timesteps) to keep from the
1461+ preceeding episode chunk when a new chunk (for the same episode) is
1462+ generated to continue sampling at a later time. The larger this value,
1463+ the more an env-to-module connector will be able to look back in time
1464+ and compile RLModule input data from this information. For example, if
1465+ your custom env-to-module connector (and your custom RLModule) requires
1466+ the previous 10 rewards as inputs, you must set this to at least 10.
14581467 use_worker_filter_stats: Whether to use the workers in the WorkerSet to
14591468 update the central filters (held by the local worker). If False, stats
14601469 from the workers will not be used and discarded.
@@ -1550,6 +1559,8 @@ def rollouts(
15501559 self ._env_to_module_connector = env_to_module_connector
15511560 if module_to_env_connector is not NotProvided :
15521561 self ._module_to_env_connector = module_to_env_connector
1562+ if episode_lookback_horizon is not NotProvided :
1563+ self .episode_lookback_horizon = episode_lookback_horizon
15531564 if use_worker_filter_stats is not NotProvided :
15541565 self .use_worker_filter_stats = use_worker_filter_stats
15551566 if update_worker_filter_stats is not NotProvided :
0 commit comments