-
Notifications
You must be signed in to change notification settings - Fork 8
Description
HW: 4XB60
=================================================
Step 1: Launch a container
docker run --rm -dit
--privileged
--net=host
--device=/dev/dri
--name=lsv-container
-v /home/shawn/:/llm/shawn
-e no_proxy=localhost,127.0.0.1
-e http_proxy=$http_proxy
-e https_proxy=$https_proxy
--shm-size="32g"
--entrypoint /bin/bash
intel/llm-scaler-vllm:0.10.0-b1
Step2: Launch Qwen2.5-VL-32B-Instruct SYM_INT4 model failed
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export VLLM_OFFLOAD_WEIGHTS_BEFORE_QUANT=0
MODEL_NAME="Qwen2.5-VL-32B-Instruct"
MODEL_PATH="/llm/shawn/models/Qwen/${MODEL_NAME}"
python3 -m vllm.entrypoints.openai.api_server
--model "${MODEL_PATH}"
--served-model-name "${MODEL_NAME}"
--dtype=float16
--enforce-eager
--port 8000
--host 0.0.0.0
--trust-remote-code
--gpu-memory-util=0.95
--no-enable-prefix-caching
--max-num-batched-tokens=4096
--disable-log-requests
--max-model-len=4096
--block-size 64
--quantization sym_int4
-tp=4
--allowed-local-media-path /llm/shawn/ \
Here if we change the paramer --quantization fp8 , the model can launch successfully, but we use --quantization sym_int4, it report error
Below is the error info.
(VllmWorker rank=3 pid=70547) INFO 08-29 00:36:37 [default_loader.py:262] Loading weights took 10.04 seconds
Loading safetensors checkpoint shards: 72% Completed | 13/18 [00:10<00:03, 1.26it/s]
(VllmWorker rank=0 pid=70544)
ERROR 08-29 00:36:39 [core.py:638] EngineCore failed to start.
ERROR 08-29 00:36:39 [core.py:638] Traceback (most recent call last):
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 629, in run_engine_core
ERROR 08-29 00:36:39 [core.py:638] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 08-29 00:36:39 [core.py:638] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 447, in init
ERROR 08-29 00:36:39 [core.py:638] super().init(vllm_config, executor_class, log_stats,
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 77, in init
ERROR 08-29 00:36:39 [core.py:638] self.model_executor = executor_class(vllm_config)
ERROR 08-29 00:36:39 [core.py:638] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/executor/executor_base.py", line 53, in init
ERROR 08-29 00:36:39 [core.py:638] self._init_executor()
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/executor/multiproc_executor.py", line 94, in _init_executor
ERROR 08-29 00:36:39 [core.py:638] self.workers = WorkerProc.wait_for_ready(unready_workers)
ERROR 08-29 00:36:39 [core.py:638] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-29 00:36:39 [core.py:638] File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/executor/multiproc_executor.py", line 446, in wait_for_ready
ERROR 08-29 00:36:39 [core.py:638] raise e from None
ERROR 08-29 00:36:39 [core.py:638] Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
Process EngineCore_0:
Traceback (most recent call last):
File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 642, in run_engine_core
raise e
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 629, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 447, in init
super().init(vllm_config, executor_class, log_stats,
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core.py", line 77, in init
self.model_executor = executor_class(vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/executor/executor_base.py", line 53, in init
self._init_executor()
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/executor/multiproc_executor.py", line 94, in _init_executor
self.workers = WorkerProc.wait_for_ready(unready_workers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/executor/multiproc_executor.py", line 446, in wait_for_ready
raise e from None
Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
Traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 1856, in
uvloop.run(run_server(args))
File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 1791, in run_server
await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 1811, in run_server_worker
async with build_async_engine_client(args, client_config) as engine_client:
File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/async_llm.py", line 163, in from_vllm_config
return cls(
^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/async_llm.py", line 117, in init
self.engine_core = EngineCoreClient.make_async_mp_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 98, in make_async_mp_client
return AsyncMPClient(*client_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 677, in init
super().init(
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/core_client.py", line 408, in init
with launch_core_engines(vllm_config, executor_class,
File "/usr/lib/python3.12/contextlib.py", line 144, in exit
next(self.gen)
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/utils.py", line 697, in launch_core_engines
wait_for_engine_startup(
File "/usr/local/lib/python3.12/dist-packages/vllm-0.10.1.dev0+g6d8d0a24c.d20250827.xpu-py3.12-linux-x86_64.egg/vllm/v1/engine/utils.py", line 750, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {'EngineCore_0': 1}
root@intelmc4b60-DS-V-P15SWN:/llm/shawn# /usr/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
/usr/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 2 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '