skip tests where compute 12 is incompatible.

mdavis36 · mdavis36 · commit a929f145e78e · 2025-10-03T19:23:05.000-07:00
diff --git a/tests/python/direct/test_cutlass_nvfp4_gemm.py b/tests/python/direct/test_cutlass_nvfp4_gemm.py
@@ -7,9 +7,10 @@
 import torch
 from nvfuser_direct import nvf_cutlass
 
-if torch.cuda.get_device_capability() < (10, 0):
+compute_cap = torch.cuda.get_device_capability()
+if compute_cap < (10, 0) or compute_cap >= (12, 0):
     pytest.skip(
-        reason="Nvfp4 Requires compute capability of 10 or above.",
+        reason="Nvfp4 Requires compute capability 10.",
         allow_module_level=True,
     )
 
diff --git a/tests/python/direct/test_narrow_precision.py b/tests/python/direct/test_narrow_precision.py
@@ -15,6 +15,7 @@
     FLOAT8_E4M3_MAX,
     pytorch_nvfp4_quantize,
     is_pre_blackwell,
+    is_pre_blackwell_12,
     linear_to_swizzled_128_4,
     round_up,
     activation_scale_to_nvfp4,
@@ -36,6 +37,9 @@ def nvfp4_quantize(x):
 @pytest.mark.skipif(
     is_pre_blackwell(), reason="Only supported on blackwell and newer devices."
 )
+@pytest.mark.skipif(
+    not is_pre_blackwell_12(), reason="Does not support blackwell compute 12.0"
+)
 @pytest.mark.parametrize("config", [[128, 256, 512], [128, 256, 512]])
 @pytest.mark.parametrize("out_dtype", [torch.bfloat16])
 def test_scaled_mm(
@@ -114,6 +118,9 @@ def nvfuser_fusion_id0(fd: FusionDefinition) -> None:
 @pytest.mark.skipif(
     is_pre_blackwell(), reason="Only supported on blackwell and newer devices."
 )
+@pytest.mark.skipif(
+    not is_pre_blackwell_12(), reason="Does not support blackwell compute 12.0"
+)
 @pytest.mark.parametrize("config", [[1024, 128, 256]])
 @pytest.mark.parametrize("tokens_per_expert_neg_one", [[115, 144, 8]])
 @pytest.mark.parametrize("out_dtype", [torch.bfloat16])
diff --git a/tests/python/direct_utils/utils.py b/tests/python/direct_utils/utils.py
@@ -28,6 +28,11 @@ def is_pre_blackwell():
     return prop.major < 10
 
 
+def is_pre_blackwell_12():
+    prop = torch.cuda.get_device_properties(torch.cuda.current_device())
+    return prop.major < 12
+
+
 # Get string representation for FusionDefinition
 # Run captured python definition
 # Check that the result of captured python definition matches original results