jd-opensource · yq33victor · Nov 3, 2025 · Nov 3, 2025
diff --git a/xllm/core/runtime/llm_engine.cpp b/xllm/core/runtime/llm_engine.cpp
@@ -89,6 +89,14 @@ LLMEngine::LLMEngine(const runtime::Options& options,
   // create ThreadPool for link cluster
   link_threadpool_ = std::make_unique<ThreadPool>(worker_clients_num_);
 
+  process_group_test();
+
+  // init thread pool
+  threadpool_ = std::make_unique<ThreadPool>(16);
+}
+
+void LLMEngine::process_group_test() {
+#if !defined(USE_NPU)
   // In multi-node serving mode, only driver engine
   // create worker_clients_.
   if (worker_clients_num_ > 1) {
@@ -101,9 +109,7 @@ LLMEngine::LLMEngine(const runtime::Options& options,
     // wait up to 4 seconds for all futures to complete
     folly::collectAll(futures).within(std::chrono::seconds(4)).get();
   }
-
-  // init thread pool
-  threadpool_ = std::make_unique<ThreadPool>(16);
+#endif
 }
 
 bool LLMEngine::init() {

diff --git a/xllm/core/runtime/llm_engine.h b/xllm/core/runtime/llm_engine.h
@@ -108,6 +108,7 @@ class LLMEngine : public Engine {
       const Engine::KVCacheCapacity& kv_cache_cap);
   std::vector<std::vector<RawForwardInput>> prepare_inputs(
       std::vector<Batch>& batch);
+  void process_group_test();
 
  protected:
   // options

diff --git a/xllm/core/runtime/vlm_engine.cpp b/xllm/core/runtime/vlm_engine.cpp
@@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
         parallel_args, devices[i], options_, worker_type));
   }
 
+  process_group_test();
+}
+
+void VLMEngine::process_group_test() {
+#if !defined(USE_NPU)
   if (workers_.size() > 1) {
     // test process group
     std::vector<folly::SemiFuture<folly::Unit>> futures;
@@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
     // wait up to 4 seconds for all futures to complete
     folly::collectAll(futures).within(std::chrono::seconds(4)).get();
   }
+#endif
 }
 
 bool VLMEngine::init() {

diff --git a/xllm/core/runtime/vlm_engine.h b/xllm/core/runtime/vlm_engine.h
@@ -53,6 +53,7 @@ class VLMEngine : public Engine {
   bool init_model();
   Engine::KVCacheCapacity estimate_kv_cache_capacity();
   bool allocate_kv_cache(const Engine::KVCacheCapacity& kv_cache_cap);
+  void process_group_test();
 
  private:
   // options