diff --git a/vllm/__init__.py b/vllm/__init__.py index 5ca4680227598..ca454efd44b24 100644 --- a/vllm/__init__.py +++ b/vllm/__init__.py @@ -3,8 +3,8 @@ from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.llm_engine import LLMEngine -from vllm.engine.ray_utils import initialize_ray_cluster from vllm.entrypoints.llm import LLM +from vllm.executor.ray_utils import initialize_ray_cluster from vllm.model_executor.models import ModelRegistry from vllm.outputs import CompletionOutput, RequestOutput from vllm.sampling_params import SamplingParams diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 3a2f7db679358..4b007d71e9cfc 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -10,7 +10,7 @@ from vllm.config import ModelConfig from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.llm_engine import LLMEngine -from vllm.engine.ray_utils import initialize_ray_cluster, ray +from vllm.executor.ray_utils import initialize_ray_cluster, ray from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.outputs import RequestOutput diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 19e58fb1722cf..56c2417d6a6e6 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -15,8 +15,8 @@ SequenceGroupOutputProcessor) from vllm.engine.output_processor.stop_checker import StopChecker from vllm.engine.output_processor.util import create_output_by_sequence_group -from vllm.engine.ray_utils import initialize_ray_cluster from vllm.executor.executor_base import ExecutorBase +from vllm.executor.ray_utils import initialize_ray_cluster from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.outputs import RequestOutput diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index e69f104e7d5a4..14b3f803782c6 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -5,8 +5,8 @@ from itertools import islice, repeat from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple -from vllm.engine.ray_utils import RayWorkerWrapper, ray from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase +from vllm.executor.ray_utils import RayWorkerWrapper, ray from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.sequence import SamplerOutput, SequenceGroupMetadata @@ -74,7 +74,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", # The driver dummy worker does not actually use any resources. # It holds the resource for the driver worker. - self.driver_dummy_worker: RayWorkerWrapper = None + self.driver_dummy_worker: Optional[RayWorkerWrapper] = None # The remaining workers are the actual ray actors. self.workers: List[RayWorkerWrapper] = [] @@ -318,6 +318,7 @@ def _run_workers( driver_worker_output = self.driver_worker.execute_method( method, *driver_args, **driver_kwargs) else: + assert self.driver_dummy_worker is not None driver_worker_output = ray.get( self.driver_dummy_worker.execute_method.remote( method, *driver_args, **driver_kwargs)) @@ -353,8 +354,9 @@ def _compiled_ray_dag(self): # a dummy value for now. It will be fixed soon. with InputNode() as input_data: forward_dag = MultiOutputNode([ - worker.execute_model_compiled_dag_remote.bind(input_data) - for worker in self.workers + worker.execute_model_compiled_dag_remote. + bind( # type: ignore[attr-defined] + input_data) for worker in self.workers ]) return forward_dag.experimental_compile() diff --git a/vllm/engine/ray_utils.py b/vllm/executor/ray_utils.py similarity index 100% rename from vllm/engine/ray_utils.py rename to vllm/executor/ray_utils.py diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py index 69380d67f9b94..0195c40c27f60 100644 --- a/vllm/transformers_utils/tokenizer_group/__init__.py +++ b/vllm/transformers_utils/tokenizer_group/__init__.py @@ -1,7 +1,7 @@ from typing import Optional from vllm.config import TokenizerPoolConfig -from vllm.engine.ray_utils import ray +from vllm.executor.ray_utils import ray from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( BaseTokenizerGroup) from vllm.transformers_utils.tokenizer_group.tokenizer_group import ( diff --git a/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py index f3cdc00564dbb..7c605416854b8 100644 --- a/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py +++ b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py @@ -6,7 +6,7 @@ from transformers import PreTrainedTokenizer from vllm.config import TokenizerPoolConfig -from vllm.engine.ray_utils import ray +from vllm.executor.ray_utils import ray from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( BaseTokenizerGroup)