@@ -101,6 +101,25 @@ def start(self, api_server_pid=None):
101101 Initializes the engine and starts its sub-services.
102102 If `api_server_pid` is defined, will launch a thread
103103 to keep getting request from zmq_server.
104+
105+ NOTE: To clarify the launch order of the components of the LLM engine:
106+ 1. First, launch splitwise scheduler (if necessary) and expert services (if necessary).
107+ 2. Then, launch common engine, which includes some background threads that inserts tasks and receives ouptuts.
108+ 3. Most importantly, launch workers and cache services. The launch order of them are listed as follows.
109+
110+ | Profile | Mixed | PrefixCache | Cache -> Worker | Worker -> Cache |
111+ |---------|-------|-------------|-----------------|-----------------|
112+ | 1 | 1 | 1 | 0 | 1 |
113+ | 1 | 1 | 0 | 0 | 0 |
114+ | 1 | 0 | 1 | 0 | 1 |
115+ | 1 | 0 | 0 | 0 | 1 |
116+ | 0 | 1 | 1 | 0 | 1 |
117+ | 0 | 1 | 0 | 0 | 0 |
118+ | 0 | 0 | 1 | 1 | 0 |
119+ | 0 | 0 | 0 | 1 | 0 |
120+
121+ 4. Finally, inform user the engine has successfully started.
122+
104123 """
105124 assert not self .is_started , "The engine is already started."
106125 start_time = time .time ()
@@ -109,7 +128,6 @@ def start(self, api_server_pid=None):
109128 self .ipc_signal_suffix = self .cfg .parallel_config .engine_worker_queue_port [0 ]
110129 self ._init_worker_signals ()
111130
112- # Launch components: scheduler, cache_manager, expert_service et.al.
113131 self .launch_components ()
114132
115133 self .engine .start ()
@@ -151,7 +169,7 @@ def check_worker_initialize_status_func(res: dict):
151169 # and then start the cache manager
152170 if self .do_profile :
153171 self ._stop_profile ()
154- elif self .cfg .cache_config .enable_prefix_caching :
172+ elif self .cfg .scheduler_config . splitwise_role == "mixed" and self . cfg . cache_config .enable_prefix_caching :
155173 device_ids = self .cfg .parallel_config .device_ids .split ("," )
156174 self .cache_manager_processes = self .engine .start_cache_service (device_ids , self .ipc_signal_suffix )
157175
0 commit comments