Skip to content

Commit ebd0842

Browse files
committed
fix bug and refine code
1 parent 86d5006 commit ebd0842

File tree

2 files changed

+20
-13
lines changed

2 files changed

+20
-13
lines changed

fastdeploy/config.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,19 +1391,20 @@ def __init__(
13911391
self.cache_config: CacheConfig = cache_config # type: ignore
13921392
self.plas_attention_config: Optional[PlasAttentionConfig] = plas_attention_config
13931393
self.structured_outputs_config: StructuredOutputsConfig = structured_outputs_config
1394-
# Initialize cuda graph capture list
1395-
if self.graph_opt_config.cudagraph_capture_sizes is None:
1396-
self.graph_opt_config._set_cudagraph_sizes(max_num_seqs=self.scheduler_config.max_num_seqs)
13971394

1398-
if self.graph_opt_config.cudagraph_only_prefill:
1399-
self.graph_opt_config.init_with_cudagrpah_size(max_capture_size=512)
1400-
elif self.speculative_config is not None and self.speculative_config.method == "mtp":
1395+
# Initialize cuda graph capture list
1396+
max_shape = self.scheduler_config.max_num_seqs
1397+
if self.speculative_config is not None and self.speculative_config.method == "mtp":
14011398
max_shape = self.scheduler_config.max_num_seqs * (self.speculative_config.num_speculative_tokens + 1)
1402-
if max_shape % 2 == 1:
1403-
max_shape = max_shape + 1
1404-
self.graph_opt_config.init_with_cudagrpah_size(max_capture_size=min(512, max_shape))
1399+
assert max_shape % 2 == 0, "CUDAGraph only supports capturing even token nums in MTP scenarios."
1400+
if self.graph_opt_config.cudagraph_only_prefill:
1401+
max_shape = 512
14051402
else:
1406-
self.graph_opt_config.init_with_cudagrpah_size(max_capture_size=self.scheduler_config.max_num_seqs)
1403+
max_shape = min(512, max_shape)
1404+
1405+
if self.graph_opt_config.cudagraph_capture_sizes is None:
1406+
self.graph_opt_config._set_cudagraph_sizes(max_num_seqs=max_shape)
1407+
self.graph_opt_config.init_with_cudagrpah_size(max_capture_size=max_shape)
14071408

14081409
self.tokenizer = tokenizer
14091410
self.ips = ips

fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def __call__(self, **kwargs) -> List[paddle.Tensor] | paddle.Tensor:
171171
for n in range(entry.num_finished_warmup, self.warm_up_size):
172172
entry.num_finished_warmup += 1
173173
entry.runnable(**kwargs)
174-
logger.debug(
174+
logger.info(
175175
f"[CUDA GRAPH][ID:{id(self)}] Warm up for real shape {padding_real_shape}, "
176176
f"finished ({n + 1}/{entry.num_finished_warmup}) times"
177177
)
@@ -207,7 +207,7 @@ def __call__(self, **kwargs) -> List[paddle.Tensor] | paddle.Tensor:
207207

208208
# For CUDAGraph debug
209209
# self._save_cudagrpah_dot_files(entry)
210-
logger.debug(f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph captured for real shape {padding_real_shape}")
210+
logger.info(f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph captured for real shape {padding_real_shape}")
211211

212212
# Replay
213213
entry.cuda_graph.replay()
@@ -224,7 +224,7 @@ def _create_entry_dict(self):
224224
for shape in self.cudagraph_capture_sizes:
225225
self.concrete_size_entries[shape] = ConcreteSizeEntry(real_shape=shape)
226226

227-
logger.debug(
227+
logger.info(
228228
f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph capture list {self.cudagraph_capture_sizes}, "
229229
"Created all real shape entry."
230230
)
@@ -254,3 +254,9 @@ def _save_cudagrpah_dot_files(self, entry):
254254
f"{log_dir}/GraphDotFiles/backend{id(self)}_shape{entry.real_shape}",
255255
1 << 0,
256256
)
257+
258+
def check_capture_successful(self):
259+
"""Check whether the shapes are captured or not"""
260+
for shape, entry in self.concrete_size_entries.items():
261+
if not entry.captured:
262+
raise ValueError(f"[CUDA GRAPH][ID:{id(self)}] Shape {shape} capture failed.")

0 commit comments

Comments
 (0)