Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] - Add timers to env step, forward pass, and complete connector pipelines runs. #51160

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions rllib/algorithms/algorithm.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes in this file are nice.

Original file line number Diff line number Diff line change
Expand Up @@ -3121,7 +3121,7 @@ def _run_one_evaluation(
"num_healthy_workers"
] = self.eval_env_runner_group.num_healthy_remote_workers()
eval_results[
"num_in_flight_async_reqs"
"actor_manager_num_outstanding_async_reqs"
] = self.eval_env_runner_group.num_in_flight_async_reqs()
eval_results[
"num_remote_worker_restarts"
Expand Down Expand Up @@ -3694,7 +3694,7 @@ def _compile_iteration_results_old_api_stack(
"num_healthy_workers"
] = self.env_runner_group.num_healthy_remote_workers()
results[
"num_in_flight_async_sample_reqs"
"actor_manager_num_outstanding_async_reqs"
] = self.env_runner_group.num_in_flight_async_reqs()
results[
"num_remote_worker_restarts"
Expand Down
67 changes: 40 additions & 27 deletions rllib/env/multi_agent_env_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,16 @@
from ray.rllib.utils.deprecation import Deprecated
from ray.rllib.utils.framework import get_device, try_import_torch
from ray.rllib.utils.metrics import (
ENV_STEP_TIMER,
ENV_TO_MODULE_TIMER,
EPISODE_DURATION_SEC_MEAN,
EPISODE_LEN_MAX,
EPISODE_LEN_MEAN,
EPISODE_LEN_MIN,
EPISODE_RETURN_MAX,
EPISODE_RETURN_MEAN,
EPISODE_RETURN_MIN,
MODULE_TO_ENV_TIMER,
NUM_AGENT_STEPS_SAMPLED,
NUM_AGENT_STEPS_SAMPLED_LIFETIME,
NUM_ENV_STEPS_SAMPLED,
Expand All @@ -45,6 +48,7 @@
NUM_EPISODES_LIFETIME,
NUM_MODULE_STEPS_SAMPLED,
NUM_MODULE_STEPS_SAMPLED_LIFETIME,
RLMODULE_INFERENCE_TIMER,
SAMPLE_TIMER,
TIME_BETWEEN_SAMPLING,
WEIGHTS_SEQ_NO,
Expand Down Expand Up @@ -310,21 +314,24 @@ def _sample(
self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0)
+ ts
) * (self.config.num_env_runners or 1)
to_env = self.module.forward_exploration(
to_module, t=global_env_steps_lifetime
)
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
to_env = self.module.forward_exploration(
to_module, t=global_env_steps_lifetime
)
else:
to_env = self.module.forward_inference(to_module)
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
to_env = self.module.forward_inference(to_module)

# Module-to-env connector.
to_env = self._module_to_env(
rl_module=self.module,
batch=to_env,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(MODULE_TO_ENV_TIMER):
to_env = self._module_to_env(
rl_module=self.module,
batch=to_env,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)
# In case all environments had been terminated `to_module` will be
# empty and no actions are needed b/c we reset all environemnts.
else:
Expand All @@ -339,7 +346,8 @@ def _sample(
actions = to_env.pop(Columns.ACTIONS, [{} for _ in episodes])
actions_for_env = to_env.pop(Columns.ACTIONS_FOR_ENV, actions)
# Try stepping the environment.
results = self._try_env_step(actions_for_env)
with self.metrics.log_time(ENV_STEP_TIMER):
results = self._try_env_step(actions_for_env)
if results == ENV_STEP_FAILURE:
return self._sample(
num_timesteps=num_timesteps,
Expand Down Expand Up @@ -453,20 +461,24 @@ def _sample(
# Run the env-to-module connector pipeline for all done episodes.
# Note, this is needed to postprocess last-step data, e.g. if the
# user uses a connector that one-hot encodes observations.
# Note, this pipeline run is not timed as the number of episodes
# can differ from `num_envs_per_env_runner` and would bias time
# measurements.
self._env_to_module(
episodes=done_episodes_to_run_env_to_module,
explore=explore,
rl_module=self.module,
shared_data=shared_data,
metrics=self.metrics,
)
self._cached_to_module = self._env_to_module(
episodes=episodes,
explore=explore,
rl_module=self.module,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(ENV_TO_MODULE_TIMER):
self._cached_to_module = self._env_to_module(
episodes=episodes,
explore=explore,
rl_module=self.module,
shared_data=shared_data,
metrics=self.metrics,
)

# Numpy'ize the done episodes after running the connector pipeline. Note,
# that we need simple `list` objects in the
Expand Down Expand Up @@ -538,13 +550,14 @@ def _reset_envs(self, episodes, shared_data, explore):
# properly been processed (if applicable).
self._cached_to_module = None
if self.module:
self._cached_to_module = self._env_to_module(
rl_module=self.module,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(ENV_TO_MODULE_TIMER):
self._cached_to_module = self._env_to_module(
rl_module=self.module,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)

# Call `on_episode_start()` callbacks (always after reset).
for env_index in range(self.num_envs):
Expand Down
64 changes: 37 additions & 27 deletions rllib/env/single_agent_env_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@
from ray.rllib.utils.deprecation import Deprecated
from ray.rllib.utils.framework import get_device
from ray.rllib.utils.metrics import (
ENV_STEP_TIMER,
ENV_TO_MODULE_TIMER,
EPISODE_DURATION_SEC_MEAN,
EPISODE_LEN_MAX,
EPISODE_LEN_MEAN,
EPISODE_LEN_MIN,
EPISODE_RETURN_MAX,
EPISODE_RETURN_MEAN,
EPISODE_RETURN_MIN,
MODULE_TO_ENV_TIMER,
NUM_AGENT_STEPS_SAMPLED,
NUM_AGENT_STEPS_SAMPLED_LIFETIME,
NUM_ENV_STEPS_SAMPLED,
Expand All @@ -47,6 +50,7 @@
NUM_EPISODES_LIFETIME,
NUM_MODULE_STEPS_SAMPLED,
NUM_MODULE_STEPS_SAMPLED_LIFETIME,
RLMODULE_INFERENCE_TIMER,
SAMPLE_TIMER,
TIME_BETWEEN_SAMPLING,
WEIGHTS_SEQ_NO,
Expand Down Expand Up @@ -296,21 +300,24 @@ def _sample(
self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0)
+ ts
) * (self.config.num_env_runners or 1)
to_env = self.module.forward_exploration(
to_module, t=global_env_steps_lifetime
)
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
to_env = self.module.forward_exploration(
to_module, t=global_env_steps_lifetime
)
else:
to_env = self.module.forward_inference(to_module)
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER):
to_env = self.module.forward_inference(to_module)

# Module-to-env connector.
to_env = self._module_to_env(
rl_module=self.module,
batch=to_env,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(MODULE_TO_ENV_TIMER):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We time all connector calls of each ConnectorPipeline.
That's why we pass the metrics object.
We should also encapsulate the timing of the overall connector pipeline call in there so that we don't duplicate this code everywhere (deduplication will make this easier to maintain)

Each connectorpipeline can own its name for logging purposes and we can also take this opportunity to prepend a meaningful prefix to each connector logging entry.

For example log the ClipReward time could be logged as ".....env_to_agent_connector_pipeline.clip_reward" and the overall env to agent pipeline could be logged as "....env_to_agent_connector_pipeline" or something like this.

to_env = self._module_to_env(
rl_module=self.module,
batch=to_env,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)

# Extract the (vectorized) actions (to be sent to the env) from the
# module/connector output. Note that these actions are fully ready (e.g.
Expand All @@ -320,7 +327,8 @@ def _sample(
actions = to_env.pop(Columns.ACTIONS)
actions_for_env = to_env.pop(Columns.ACTIONS_FOR_ENV, actions)
# Try stepping the environment.
results = self._try_env_step(actions_for_env)
with self.metrics.log_time(ENV_STEP_TIMER):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metrics context can go inside _try_env_step so we don't have to duplicate the call to it for single- and multi-agent env runners.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea. Let's move it to the parent.

results = self._try_env_step(actions_for_env)
if results == ENV_STEP_FAILURE:
return self._sample(
num_timesteps=num_timesteps,
Expand Down Expand Up @@ -364,13 +372,14 @@ def _sample(
# Env-to-module connector pass (cache results as we will do the RLModule
# forward pass only in the next `while`-iteration.
if self.module is not None:
self._cached_to_module = self._env_to_module(
episodes=episodes,
explore=explore,
rl_module=self.module,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(ENV_TO_MODULE_TIMER):
self._cached_to_module = self._env_to_module(
episodes=episodes,
explore=explore,
rl_module=self.module,
shared_data=shared_data,
metrics=self.metrics,
)

for env_index in range(self.num_envs):
# Call `on_episode_start()` callback (always after reset).
Expand Down Expand Up @@ -732,13 +741,14 @@ def _reset_envs(self, episodes, shared_data, explore):
# properly been processed (if applicable).
self._cached_to_module = None
if self.module:
self._cached_to_module = self._env_to_module(
rl_module=self.module,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)
with self.metrics.log_time(ENV_TO_MODULE_TIMER):
self._cached_to_module = self._env_to_module(
rl_module=self.module,
episodes=episodes,
explore=explore,
shared_data=shared_data,
metrics=self.metrics,
)

# Call `on_episode_start()` callbacks (always after reset).
for env_index in range(self.num_envs):
Expand Down
4 changes: 4 additions & 0 deletions rllib/utils/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@
GRAD_WAIT_TIMER = "grad_wait"
SAMPLE_TIMER = "sample" # @OldAPIStack
ENV_RUNNER_SAMPLING_TIMER = "env_runner_sampling_timer"
ENV_STEP_TIMER = "env_step_timer"
ENV_TO_MODULE_TIMER = "env_to_module_timer"
RLMODULE_INFERENCE_TIMER = "rlmodule_inference_timer"
MODULE_TO_ENV_TIMER = "module_to_env_timer"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should take this opportunity to make the names of our metrics more expressive.
Metrics should self-document as much as possible for users who do not have a good overview of RLlib.
"env_step_timer" is good imo. "env_to_module_timer" should be "env_to_module_connector_pipeline_timer" or something similar. Same goes for the others 👍

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general a good idea. In this case the env_to_module is the name of the EnvToModuleConnectorPipeline in the EnvRunner. Same goes for the module_to_env.

OFFLINE_SAMPLING_TIMER = "offline_sampling_timer"
REPLAY_BUFFER_ADD_DATA_TIMER = "replay_buffer_add_data_timer"
REPLAY_BUFFER_SAMPLE_TIMER = "replay_buffer_sampling_timer"
Expand Down