-
Notifications
You must be signed in to change notification settings - Fork 6.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RLlib] - Add timers to env step, forward pass, and complete connector pipelines runs. #51160
base: master
Are you sure you want to change the base?
Changes from all commits
509b9b4
ea313c9
880b7a0
d99e750
42c867c
93fd59e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,13 +32,16 @@ | |
from ray.rllib.utils.deprecation import Deprecated | ||
from ray.rllib.utils.framework import get_device | ||
from ray.rllib.utils.metrics import ( | ||
ENV_STEP_TIMER, | ||
ENV_TO_MODULE_TIMER, | ||
EPISODE_DURATION_SEC_MEAN, | ||
EPISODE_LEN_MAX, | ||
EPISODE_LEN_MEAN, | ||
EPISODE_LEN_MIN, | ||
EPISODE_RETURN_MAX, | ||
EPISODE_RETURN_MEAN, | ||
EPISODE_RETURN_MIN, | ||
MODULE_TO_ENV_TIMER, | ||
NUM_AGENT_STEPS_SAMPLED, | ||
NUM_AGENT_STEPS_SAMPLED_LIFETIME, | ||
NUM_ENV_STEPS_SAMPLED, | ||
|
@@ -47,6 +50,7 @@ | |
NUM_EPISODES_LIFETIME, | ||
NUM_MODULE_STEPS_SAMPLED, | ||
NUM_MODULE_STEPS_SAMPLED_LIFETIME, | ||
RLMODULE_INFERENCE_TIMER, | ||
SAMPLE_TIMER, | ||
TIME_BETWEEN_SAMPLING, | ||
WEIGHTS_SEQ_NO, | ||
|
@@ -296,21 +300,24 @@ def _sample( | |
self.metrics.peek(NUM_ENV_STEPS_SAMPLED_LIFETIME, default=0) | ||
+ ts | ||
) * (self.config.num_env_runners or 1) | ||
to_env = self.module.forward_exploration( | ||
to_module, t=global_env_steps_lifetime | ||
) | ||
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER): | ||
to_env = self.module.forward_exploration( | ||
to_module, t=global_env_steps_lifetime | ||
) | ||
else: | ||
to_env = self.module.forward_inference(to_module) | ||
with self.metrics.log_time(RLMODULE_INFERENCE_TIMER): | ||
to_env = self.module.forward_inference(to_module) | ||
|
||
# Module-to-env connector. | ||
to_env = self._module_to_env( | ||
rl_module=self.module, | ||
batch=to_env, | ||
episodes=episodes, | ||
explore=explore, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
with self.metrics.log_time(MODULE_TO_ENV_TIMER): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We time all connector calls of each ConnectorPipeline. Each connectorpipeline can own its name for logging purposes and we can also take this opportunity to prepend a meaningful prefix to each connector logging entry. For example log the ClipReward time could be logged as ".....env_to_agent_connector_pipeline.clip_reward" and the overall env to agent pipeline could be logged as "....env_to_agent_connector_pipeline" or something like this. |
||
to_env = self._module_to_env( | ||
rl_module=self.module, | ||
batch=to_env, | ||
episodes=episodes, | ||
explore=explore, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
|
||
# Extract the (vectorized) actions (to be sent to the env) from the | ||
# module/connector output. Note that these actions are fully ready (e.g. | ||
|
@@ -320,7 +327,8 @@ def _sample( | |
actions = to_env.pop(Columns.ACTIONS) | ||
actions_for_env = to_env.pop(Columns.ACTIONS_FOR_ENV, actions) | ||
# Try stepping the environment. | ||
results = self._try_env_step(actions_for_env) | ||
with self.metrics.log_time(ENV_STEP_TIMER): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The metrics context can go inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good idea. Let's move it to the parent. |
||
results = self._try_env_step(actions_for_env) | ||
if results == ENV_STEP_FAILURE: | ||
return self._sample( | ||
num_timesteps=num_timesteps, | ||
|
@@ -364,13 +372,14 @@ def _sample( | |
# Env-to-module connector pass (cache results as we will do the RLModule | ||
# forward pass only in the next `while`-iteration. | ||
if self.module is not None: | ||
self._cached_to_module = self._env_to_module( | ||
episodes=episodes, | ||
explore=explore, | ||
rl_module=self.module, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
with self.metrics.log_time(ENV_TO_MODULE_TIMER): | ||
self._cached_to_module = self._env_to_module( | ||
episodes=episodes, | ||
explore=explore, | ||
rl_module=self.module, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
|
||
for env_index in range(self.num_envs): | ||
# Call `on_episode_start()` callback (always after reset). | ||
|
@@ -732,13 +741,14 @@ def _reset_envs(self, episodes, shared_data, explore): | |
# properly been processed (if applicable). | ||
self._cached_to_module = None | ||
if self.module: | ||
self._cached_to_module = self._env_to_module( | ||
rl_module=self.module, | ||
episodes=episodes, | ||
explore=explore, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
with self.metrics.log_time(ENV_TO_MODULE_TIMER): | ||
self._cached_to_module = self._env_to_module( | ||
rl_module=self.module, | ||
episodes=episodes, | ||
explore=explore, | ||
shared_data=shared_data, | ||
metrics=self.metrics, | ||
) | ||
|
||
# Call `on_episode_start()` callbacks (always after reset). | ||
for env_index in range(self.num_envs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -160,6 +160,10 @@ | |
GRAD_WAIT_TIMER = "grad_wait" | ||
SAMPLE_TIMER = "sample" # @OldAPIStack | ||
ENV_RUNNER_SAMPLING_TIMER = "env_runner_sampling_timer" | ||
ENV_STEP_TIMER = "env_step_timer" | ||
ENV_TO_MODULE_TIMER = "env_to_module_timer" | ||
RLMODULE_INFERENCE_TIMER = "rlmodule_inference_timer" | ||
MODULE_TO_ENV_TIMER = "module_to_env_timer" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should take this opportunity to make the names of our metrics more expressive. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general a good idea. In this case the |
||
OFFLINE_SAMPLING_TIMER = "offline_sampling_timer" | ||
REPLAY_BUFFER_ADD_DATA_TIMER = "replay_buffer_add_data_timer" | ||
REPLAY_BUFFER_SAMPLE_TIMER = "replay_buffer_sampling_timer" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changes in this file are nice.