From eef66230225dc27a1c9d6a12a57120a4e102fa27 Mon Sep 17 00:00:00 2001 From: Stephanie Wang Date: Wed, 12 Jun 2024 16:57:22 -0700 Subject: [PATCH] merge Signed-off-by: Stephanie Wang --- vllm/worker/cpu_model_runner.py | 3 ++- vllm/worker/model_runner.py | 2 +- vllm/worker/worker.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index 4cd63e54df4d..188eba395df4 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -319,7 +319,8 @@ def execute_model( "kv_caches": kv_caches, "attn_metadata": model_input.attn_metadata, } - if self.vision_language_config and model_input.multi_modal_input is not None: + if (self.vision_language_config + and model_input.multi_modal_input is not None): execute_model_kwargs.update(model_input.multi_modal_input) hidden_states = model_executable(**execute_model_kwargs) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index b552dbb88399..71015297e129 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -19,8 +19,8 @@ from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager from vllm.model_executor import SamplingMetadata from vllm.model_executor.model_loader import get_model -from vllm.model_input import GPUModelInput from vllm.model_executor.model_loader.tensorizer import TensorizerConfig +from vllm.model_input import GPUModelInput from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.sampling_params import SamplingParams from vllm.sequence import (ModelInputWithSamplingMetadata, SamplerOutput, diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 8158e3dd80b6..b0d53ccc9ddf 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -15,8 +15,8 @@ set_custom_all_reduce) from vllm.lora.request import LoRARequest from vllm.model_executor import set_random_seed -from vllm.model_input import GPUModelInput from vllm.model_executor.model_loader.tensorizer import TensorizerConfig +from vllm.model_input import GPUModelInput from vllm.sequence import ExecuteModelRequest, PoolerOutput, SamplerOutput from vllm.worker.cache_engine import CacheEngine from vllm.worker.embedding_model_runner import EmbeddingModelRunner