We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3933f18 commit 933cdeaCopy full SHA for 933cdea
vllm/v1/worker/gpu_model_runner.py
@@ -4149,6 +4149,11 @@ def calculate_reorder_batch_threshold(self) -> None:
4149
group.get_metadata_builder().reorder_batch_threshold
4150
for group in self._attn_group_iterator()
4151
]
4152
+ # If there are no attention groups (attention-free model) or no backend
4153
+ # reports a threshold, leave reordering disabled.
4154
+ if len(reorder_batch_thresholds) == 0:
4155
+ self.reorder_batch_threshold = None
4156
+ return
4157
self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
4158
4159
def _find_compatible_block_sizes(
0 commit comments