[Kernel] Raise an exception in MoE kernel if the batch size is larger…

… then 65k (vllm-project#5939)
llmpros · Jun 30, 2024 · e730666 · e730666
1 parent b2805d7
commit e730666
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -423,6 +423,11 @@ def fused_experts(hidden_states: torch.Tensor,
     M, _ = hidden_states.shape
     E, N, _ = w1.shape
 
+    if M > 65536:
+        # https://github.com/vllm-project/vllm/issues/5938
+        raise ValueError("MoE kernel does not support more than 65536 tokens, "
+                         f"but got {M}")
+
     if override_config:
         config = override_config
     else: