Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions xllm/core/runtime/llm_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ LLMEngine::LLMEngine(const runtime::Options& options,
// create ThreadPool for link cluster
link_threadpool_ = std::make_unique<ThreadPool>(worker_clients_num_);

process_group_test();

// init thread pool
threadpool_ = std::make_unique<ThreadPool>(16);
}

void LLMEngine::process_group_test() {
#if !defined(USE_NPU)
// In multi-node serving mode, only driver engine
// create worker_clients_.
if (worker_clients_num_ > 1) {
Expand All @@ -101,9 +109,7 @@ LLMEngine::LLMEngine(const runtime::Options& options,
// wait up to 4 seconds for all futures to complete
folly::collectAll(futures).within(std::chrono::seconds(4)).get();
}

// init thread pool
threadpool_ = std::make_unique<ThreadPool>(16);
#endif
}

bool LLMEngine::init() {
Expand Down
1 change: 1 addition & 0 deletions xllm/core/runtime/llm_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class LLMEngine : public Engine {
const Engine::KVCacheCapacity& kv_cache_cap);
std::vector<std::vector<RawForwardInput>> prepare_inputs(
std::vector<Batch>& batch);
void process_group_test();

protected:
// options
Expand Down
6 changes: 6 additions & 0 deletions xllm/core/runtime/vlm_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
parallel_args, devices[i], options_, worker_type));
}

process_group_test();
}

void VLMEngine::process_group_test() {
#if !defined(USE_NPU)
if (workers_.size() > 1) {
// test process group
std::vector<folly::SemiFuture<folly::Unit>> futures;
Expand All @@ -71,6 +76,7 @@ VLMEngine::VLMEngine(const runtime::Options& options) : options_(options) {
// wait up to 4 seconds for all futures to complete
folly::collectAll(futures).within(std::chrono::seconds(4)).get();
}
#endif
}

bool VLMEngine::init() {
Expand Down
1 change: 1 addition & 0 deletions xllm/core/runtime/vlm_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class VLMEngine : public Engine {
bool init_model();
Engine::KVCacheCapacity estimate_kv_cache_capacity();
bool allocate_kv_cache(const Engine::KVCacheCapacity& kv_cache_cap);
void process_group_test();

private:
// options
Expand Down