Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,9 @@ def __init__(
""" Whether to use shared memory pool for multi capture_size """
self.use_unique_memory_pool: bool = False

""" Whether to use cudagraph for draft model."""
self.draft_model_use_cudagraph: bool = True

self.max_capture_size: int = None
self.real_shape_to_captured_size: dict[int, int] = None
# CINN Config ...
Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/spec_decode/mtp.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __init__(
self._init_model_inputs()

# CUDA Graph
self.draft_model_use_cudagraph = self.graph_opt_config.draft_model_use_cudagraph
self.cudagraph_capture_sizes = list(reversed(self.graph_opt_config.cudagraph_capture_sizes))
self.sot_warmup_sizes = self.graph_opt_config.sot_warmup_sizes

Expand Down Expand Up @@ -591,7 +592,7 @@ def _initialize_forward_meta(self, step_use_cudagraph: bool = False):
attn_backend.init_attention_metadata(self.forward_meta)

# TODO(gongshaotian): Use CUDAGraph with Draft Model
self.forward_meta.step_use_cudagraph = step_use_cudagraph
self.forward_meta.step_use_cudagraph = step_use_cudagraph and self.draft_model_use_cudagraph

def exist_prefill(self):
"""
Expand Down
Loading