diff --git a/xllm/core/runtime/llm_engine.cpp b/xllm/core/runtime/llm_engine.cpp index a9111b2a..b2eddb5c 100644 --- a/xllm/core/runtime/llm_engine.cpp +++ b/xllm/core/runtime/llm_engine.cpp @@ -89,6 +89,14 @@ LLMEngine::LLMEngine(const runtime::Options& options, // create ThreadPool for link cluster link_threadpool_ = std::make_unique(worker_clients_num_); + process_group_test(); + + // init thread pool + threadpool_ = std::make_unique(16); +} + +void LLMEngine::process_group_test() { +#if !defined(USE_NPU) // In multi-node serving mode, only driver engine // create worker_clients_. if (worker_clients_num_ > 1) { @@ -101,9 +109,7 @@ LLMEngine::LLMEngine(const runtime::Options& options, // wait up to 4 seconds for all futures to complete folly::collectAll(futures).within(std::chrono::seconds(4)).get(); } - - // init thread pool - threadpool_ = std::make_unique(16); +#endif } bool LLMEngine::init() { diff --git a/xllm/core/runtime/llm_engine.h b/xllm/core/runtime/llm_engine.h index b09267b7..8d6c083c 100644 --- a/xllm/core/runtime/llm_engine.h +++ b/xllm/core/runtime/llm_engine.h @@ -108,6 +108,7 @@ class LLMEngine : public Engine { const Engine::KVCacheCapacity& kv_cache_cap); std::vector> prepare_inputs( std::vector& batch); + void process_group_test(); protected: // options diff --git a/xllm/core/runtime/vlm_engine.cpp b/xllm/core/runtime/vlm_engine.cpp index 47374559..3c7431bc 100644 --- a/xllm/core/runtime/vlm_engine.cpp +++ b/xllm/core/runtime/vlm_engine.cpp @@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) { parallel_args, devices[i], options_, worker_type)); } + process_group_test(); +} + +void VLMEngine::process_group_test() { +#if !defined(USE_NPU) if (workers_.size() > 1) { // test process group std::vector> futures; @@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) { // wait up to 4 seconds for all futures to complete folly::collectAll(futures).within(std::chrono::seconds(4)).get(); } +#endif } bool VLMEngine::init() { diff --git a/xllm/core/runtime/vlm_engine.h b/xllm/core/runtime/vlm_engine.h index b4500237..69db8432 100644 --- a/xllm/core/runtime/vlm_engine.h +++ b/xllm/core/runtime/vlm_engine.h @@ -53,6 +53,7 @@ class VLMEngine : public Engine { bool init_model(); Engine::KVCacheCapacity estimate_kv_cache_capacity(); bool allocate_kv_cache(const Engine::KVCacheCapacity& kv_cache_cap); + void process_group_test(); private: // options