Added basic lit test for flex_attention

keshavvinayak01 · keshavvinayak01 · commit 095cb61db814 · 2025-11-05T02:49:00.000-08:00
Signed-off-by: Keshav Vinayak Jha &lt;keshavvinayakjha@gmail.com&gt;
diff --git a/test/python/fx_importer/basic_test.py b/test/python/fx_importer/basic_test.py
@@ -5,6 +5,8 @@
 
 # RUN: %PYTHON %s | FileCheck %s
 
+from torch._tensor import Tensor
+from torch.nn.attention.flex_attention import _mask_mod_signature
 from typing import List
 
 import torch
@@ -251,6 +253,82 @@ def body(i, x):
     print(m)
 
 
+@run
+# CHECK-LABEL: test_flex_attention
+# Check that helper functions are emitted as private functions
+# CHECK: func.func private @sdpa_score{{[0-9]+}}(%arg0: !torch.vtensor<[],f32>, %arg1: !torch.vtensor<[],si32>, %arg2: !torch.vtensor<[],si32>, %arg3: !torch.vtensor<[],si32>, %arg4: !torch.vtensor<[],si32>) -> !torch.vtensor<[],f32>
+# CHECK: func.func private @sdpa_mask{{[0-9]+}}(%arg0: !torch.vtensor<[],si32>, %arg1: !torch.vtensor<[],si32>, %arg2: !torch.vtensor<[],si32>, %arg3: !torch.vtensor<[],si32>) -> !torch.vtensor<[],i1>
+# Check the main function calls flex_attention with both score_mod and mask_mod
+# CHECK: func.func @test_flex_attention(
+# CHECK-SAME: %arg{{[0-9]+}}: !torch.vtensor<[2,4,16,8],f32>, %arg{{[0-9]+}}: !torch.vtensor<[2,4,16,8],f32>, %arg{{[0-9]+}}: !torch.vtensor<[2,4,16,8],f32>)
+# CHECK-SAME: -> (!torch.vtensor<[2,4,16,8],f32>, !torch.vtensor<[2,4,16],f32>)
+# CHECK: %[[SCALE:.*]] = torch.constant.float 1.000000e+00
+# CHECK: %[[TRUE:.*]] = torch.constant.bool true
+# CHECK: %[[OUTPUT:.*]], %[[LSE:.*]] = torch.aten.flex_attention %{{.*}}, %{{.*}}, %{{.*}}, %[[SCALE]], %[[TRUE]] {mask_mod_fn = @sdpa_mask{{[0-9]+}}, score_mod_fn = @sdpa_score{{[0-9]+}}}
+# CHECK: return %[[OUTPUT]], %[[LSE]]
+def test_flex_attention():
+    from torch._higher_order_ops.flex_attention import flex_attention
+    from torch.nn.attention.flex_attention import create_block_mask
+
+    class FlexAttentionWithMaskModule(nn.Module):
+        def __init__(self, B, H, M):
+            super().__init__()
+
+            # Create block mask and register tensors as buffers
+            def causal_mask(b, h, q_idx, kv_idx):
+                return q_idx >= kv_idx
+
+            bm = create_block_mask(causal_mask, B, H, M, M, device="cpu")
+            bm_tuple = bm.as_tuple()
+
+            # Register each tensor component as a buffer so torch.export can track them
+            # as part of the module's state. Without this, the export tracer cannot
+            # properly capture these tensors in the graph since they're created outside
+            # the forward pass and would appear as untracked external references.
+            for idx, tensor in enumerate(bm_tuple):
+                if isinstance(tensor, torch.Tensor):
+                    self.register_buffer(f"bm_tensor_{idx}", tensor, persistent=False)
+                else:
+                    setattr(self, f"bm_scalar_{idx}", tensor)
+
+            self.bm_tuple_length = len(bm_tuple)
+
+        def forward(self, q, k, v):
+            def score_mod(score, b, h, q_idx, kv_idx):
+                return score * 0.5
+
+            # Reconstruct block mask tuple from buffers
+            bm_tuple = tuple(
+                (
+                    getattr(self, f"bm_tensor_{idx}")
+                    if hasattr(self, f"bm_tensor_{idx}")
+                    else getattr(self, f"bm_scalar_{idx}")
+                )
+                for idx in range(self.bm_tuple_length)
+            )
+
+            return flex_attention(
+                q,
+                k,
+                v,
+                score_mod=score_mod,
+                block_mask=bm_tuple,
+                scale=1.0,
+                kernel_options={},
+            )
+
+    # Export -> import to Torch-MLIR
+    B, H, M, K = 2, 4, 16, 8
+    q = torch.randn(B, H, M, K)
+    k = torch.randn(B, H, M, K)
+    v = torch.randn(B, H, M, K)
+
+    m = fx.export_and_import(
+        FlexAttentionWithMaskModule(B, H, M), q, k, v, func_name="test_flex_attention"
+    )
+    print(m)
+
+
 @run
 # CHECK-LABEL: test_stack_trace
 # CHECK: #loc[[LOC1:.+]] = loc(