[BugFix] Don’t compute reorder threshold when there are no attention groups (vllm-project#27861)

hl475 · web-flow · commit 933cdea44061 · 2025-10-31T11:36:18.000Z
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -4149,6 +4149,11 @@ def calculate_reorder_batch_threshold(self) -> None:
             group.get_metadata_builder().reorder_batch_threshold
             for group in self._attn_group_iterator()
         ]
+        # If there are no attention groups (attention-free model) or no backend
+        # reports a threshold, leave reordering disabled.
+        if len(reorder_batch_thresholds) == 0:
+            self.reorder_batch_threshold = None
+            return
         self.reorder_batch_threshold = reduce(min_none_high, reorder_batch_thresholds)
 
     def _find_compatible_block_sizes(