bugfix: Choose sm90 kernels only for Hopper GPUs. (#719)

Some kernels use instructions specific to Hopper, which might not be compatible with future GPUs. Fallback to non-Hopper kernels for all the other GPUs.
flashinfer-ai · Jan 6, 2025 · 06309c4 · 06309c4
1 parent 9a00cc2
commit 06309c4
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/flashinfer/utils.py b/flashinfer/utils.py
@@ -262,7 +262,7 @@ def get_cuda_stream(device: torch.device) -> int:
 
 def determine_gemm_backend(device: torch.device) -> str:
     major, _ = get_compute_capability(device)
-    if major >= 9 and torch.version.cuda >= "12.3":
+    if major == 9 and torch.version.cuda >= "12.3":
         return "sm90"
     else:
         return "sm80"
@@ -349,7 +349,7 @@ def determine_attention_backend(
     major, _ = get_compute_capability(device)
 
     if (
-        major >= 9
+        major == 9
         and torch.version.cuda >= "12.3"
         and is_fa3_backend_supported(
             pos_encoding_mode,