-
Notifications
You must be signed in to change notification settings - Fork 7.1k
add queue length based autoscaling #59351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7279bbe
440fae7
8ae50fd
9a9d637
d6ee531
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| from ray.serve._private.config import DeploymentConfig | ||
| from ray.serve._private.constants import ( | ||
| DEFAULT_AUTOSCALING_POLICY_NAME, | ||
| DEFAULT_QUEUE_BASED_AUTOSCALING_POLICY, | ||
| DEFAULT_REQUEST_ROUTER_PATH, | ||
| RAY_SERVE_ENABLE_TASK_EVENTS, | ||
| SERVE_LOGGER_NAME, | ||
|
|
@@ -39,6 +40,10 @@ | |
| from ray.serve._private.deployment_state import DeploymentStateManager | ||
| from ray.serve._private.endpoint_state import EndpointState | ||
| from ray.serve._private.logging_utils import configure_component_logger | ||
| from ray.serve._private.queue_monitor import ( | ||
| QueueMonitorConfig, | ||
| create_queue_monitor_actor, | ||
| ) | ||
| from ray.serve._private.storage.kv_store import KVStoreBase | ||
| from ray.serve._private.usage import ServeUsageTag | ||
| from ray.serve._private.utils import ( | ||
|
|
@@ -74,6 +79,87 @@ | |
| CHECKPOINT_KEY = "serve-application-state-checkpoint" | ||
|
|
||
|
|
||
| def _is_task_consumer_deployment(deployment_info: DeploymentInfo) -> bool: | ||
| """Check if a deployment is a TaskConsumer.""" | ||
| try: | ||
| deployment_def = deployment_info.replica_config.deployment_def | ||
| if deployment_def is None: | ||
| return False | ||
| return getattr(deployment_def, "_is_task_consumer", False) | ||
| except Exception as e: | ||
| logger.debug(f"Error checking if deployment is TaskConsumer: {e}") | ||
harshit-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return False | ||
|
|
||
|
|
||
| def _get_queue_monitor_config( | ||
| deployment_info: DeploymentInfo, | ||
| ) -> Optional[QueueMonitorConfig]: | ||
| """Extract QueueMonitorConfig from a TaskConsumer deployment.""" | ||
| try: | ||
| deployment_def = deployment_info.replica_config.deployment_def | ||
| if hasattr(deployment_def, "get_queue_monitor_config"): | ||
| return deployment_def.get_queue_monitor_config() | ||
| except Exception as e: | ||
| logger.warning(f"Failed to get queue monitor config: {e}") | ||
harshit-anyscale marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return None | ||
|
|
||
|
|
||
| def _configure_queue_based_autoscaling_for_task_consumers( | ||
| deployment_infos: Dict[str, DeploymentInfo] | ||
| ) -> None: | ||
| """ | ||
| Configure queue-based autoscaling for TaskConsumers. | ||
|
|
||
| For TaskConsumer deployments with autoscaling enabled and no custom policy, | ||
| this function switches the autoscaling policy to queue-based autoscaling. | ||
|
|
||
| Args: | ||
| deployment_infos: Deployment infos dict | ||
| """ | ||
| for deployment_name, deployment_info in deployment_infos.items(): | ||
| is_task_consumer = _is_task_consumer_deployment(deployment_info) | ||
| has_autoscaling = ( | ||
| deployment_info.deployment_config.autoscaling_config is not None | ||
| ) | ||
|
|
||
| # Set queue-based autoscaling policy on TaskConsumer only if user hasn't set a custom policy. This respects user's explicit choice. | ||
| if is_task_consumer and has_autoscaling: | ||
| logger.info( | ||
| f"Deployment '{deployment_name}' is a TaskConsumer with autoscaling enabled" | ||
| ) | ||
| is_default_policy = ( | ||
| deployment_info.deployment_config.autoscaling_config.policy.is_default_policy_function() | ||
| ) | ||
|
|
||
| if is_default_policy: | ||
| queue_monitor_config = _get_queue_monitor_config(deployment_info) | ||
| if queue_monitor_config is not None: | ||
| # Create QueueMonitor as a Ray actor (not Serve deployment) | ||
| # This avoids deadlock when autoscaling policy queries it from controller | ||
| try: | ||
| create_queue_monitor_actor( | ||
| deployment_name=deployment_name, | ||
| config=queue_monitor_config, | ||
| ) | ||
| except Exception as e: | ||
| logger.error( | ||
| f"Failed to create QueueMonitor actor for '{deployment_name}': {e}" | ||
| ) | ||
| continue | ||
|
Comment on lines
+144
to
+148
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why fail silently, should we fail deployment here |
||
|
|
||
| # Switch to queue-based autoscaling policy | ||
| deployment_info.deployment_config.autoscaling_config.policy = ( | ||
| AutoscalingPolicy( | ||
| policy_function=DEFAULT_QUEUE_BASED_AUTOSCALING_POLICY | ||
| ) | ||
| ) | ||
| logger.info( | ||
| f"Switched TaskConsumer '{deployment_name}' to queue-based autoscaling policy" | ||
| ) | ||
|
|
||
| return deployment_infos | ||
|
|
||
|
|
||
| class BuildAppStatus(Enum): | ||
| """Status of the build application task.""" | ||
|
|
||
|
|
@@ -1220,6 +1306,10 @@ def deploy_apps( | |
| ) | ||
| for params in deployment_args | ||
| } | ||
|
|
||
| # Configure queue-based autoscaling for TaskConsumers | ||
| _configure_queue_based_autoscaling_for_task_consumers(deployment_infos) | ||
|
Comment on lines
+1310
to
+1311
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this only takes effect in imperative mode. If user try to deploy through config then this code path is not executed |
||
|
|
||
| self._application_states[name].deploy_app( | ||
| deployment_infos, external_scaler_enabled | ||
| ) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this access is unsafe because it deserializes user code in controller, controller does not have the user's runtime env to execute this code.
For example if user code depends on pytorch and torch is mentioned as a pip dependency in deployment's
runtime_env, then this call here will fail in cluster mode.