llvm · catcor01 · Apr 24, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -6623,6 +6623,36 @@ def Torch_AtenMmOp : Torch_Op<"aten.mm", [
   }];
 }
 
+def Torch_Aten_ScaledMmOp : Torch_Op<"aten._scaled_mm", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::_scaled_mm : (Tensor, Tensor, Tensor, Tensor, Tensor?, Tensor?, int?, bool) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    AnyTorchTensorType:$mat2,
+    AnyTorchTensorType:$scale_a,
+    AnyTorchTensorType:$scale_b,
+    AnyTorchOptionalTensorType:$bias,
+    AnyTorchOptionalTensorType:$scale_result,
+    AnyTorchOptionalIntType:$out_dtype,
+    Torch_BoolType:$use_fast_accum
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult Aten_ScaledMmOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 8, 1);
+    }
+    void Aten_ScaledMmOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 8, 1);
+    }
+  }];
+}
+
 def Torch_Aten_IntMmOp : Torch_Op<"aten._int_mm", [
     AllowsTypeRefinement,
     HasValueSemantics,

diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp
@@ -297,27 +297,27 @@ std::optional<Value> getConstTensor<float>(PatternRewriter &rewriter,
       (src.isF32() && dest.isInteger(8)) ||
       (src.isF32() && dest.isBF16()) ||
       (src.isF32() && dest.isF16()) ||
-      (src.isF32() && isa<Float8E4M3Type>(dest)) ||
+      (src.isF32() && isa<Float8E4M3FNType>(dest)) ||
       (src.isF32() && isa<Float8E5M2Type>(dest)) ||
       // f16 -> *
       (src.isF16() && dest.isInteger(32)) ||
       (src.isF16() && dest.isInteger(16)) ||
       (src.isF16() && dest.isInteger(8)) ||
       (src.isF16() && dest.isBF16()) ||
       (src.isF16() && dest.isF32()) ||
-      (src.isF16() && isa<Float8E4M3Type>(dest)) ||
+      (src.isF16() && isa<Float8E4M3FNType>(dest)) ||
       (src.isF16() && isa<Float8E5M2Type>(dest)) ||
       // bf16 -> *
       (src.isBF16() && dest.isInteger(32)) ||
       (src.isBF16() && dest.isInteger(16)) ||
       (src.isBF16() && dest.isInteger(8)) ||
       (src.isBF16() && dest.isF32()) ||
-      (src.isBF16() && isa<Float8E4M3Type>(dest)) ||
+      (src.isBF16() && isa<Float8E4M3FNType>(dest)) ||
       (src.isBF16() && isa<Float8E5M2Type>(dest)) ||
       // fp8e4m3 -> *
-      (isa<Float8E4M3Type>(src) && dest.isBF16()) ||
-      (isa<Float8E4M3Type>(src) && dest.isF32()) ||
-      (isa<Float8E4M3Type>(src) && dest.isF16()) ||
+      (isa<Float8E4M3FNType>(src) && dest.isBF16()) ||
+      (isa<Float8E4M3FNType>(src) && dest.isF32()) ||
+      (isa<Float8E4M3FNType>(src) && dest.isF16()) ||
       // fp8e5m2 -> *
       (isa<Float8E5M2Type>(src) && dest.isBF16()) ||
       (isa<Float8E5M2Type>(src) && dest.isF32()) ||
@@ -514,8 +514,8 @@ LogicalResult getConvOpsAccType(PatternRewriter &rewriter,
   } else if (inputElemTy.isInteger(16) && weightElemTy.isInteger(8) &&
              outputElemTy.isInteger(48)) {
     accType = mlir::TypeAttr::get(rewriter.getIntegerType(48));
-  } else if ((isa<Float8E4M3Type>(inputElemTy) &&
-              isa<Float8E4M3Type>(weightElemTy) && outputElemTy.isF16()) ||
+  } else if ((isa<Float8E4M3FNType>(inputElemTy) &&
+              isa<Float8E4M3FNType>(weightElemTy) && outputElemTy.isF16()) ||
              (isa<Float8E5M2Type>(inputElemTy) &&
               isa<Float8E5M2Type>(weightElemTy) && outputElemTy.isF16())) {
     accType = mlir::TypeAttr::get(rewriter.getF16Type());

diff --git a/lib/Dialect/Torch/IR/TorchTypes.cpp b/lib/Dialect/Torch/IR/TorchTypes.cpp
@@ -191,8 +191,8 @@ static bool isValidTorchDtype(Type dtype) {
   // Builtin floating point types.
   if (isa<Float16Type, BFloat16Type, Float32Type, Float64Type>(dtype))
     return true;
-  if (isa<Float8E5M2Type, Float8E4M3FNType, Float8E5M2FNUZType,
-          Float8E4M3FNUZType, Float8E4M3B11FNUZType, Float8E8M0FNUType>(dtype))
+  if (isa<Float8E5M2Type, Float8E4M3FNType, Float8E8M0FNUType,
+          Float8E5M2FNUZType, Float8E4M3FNUZType, Float8E4M3B11FNUZType>(dtype))
     return true;
 
   if (isa<Torch::StringType>(dtype))

diff --git a/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp b/lib/Dialect/Torch/Transforms/AbstractInterpLibrary.cpp
@@ -8407,6 +8407,10 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() {
 "    %0 = call @__torch__.torch.jit._shape_functions.mm(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
 "  }\n"
+"  func.func @\"__torch_mlir_shape_fn.aten._scaled_mm\"(%arg0: !torch.list<int>, %arg1: !torch.list<int>, %arg2: !torch.list<int>, %arg3: !torch.list<int>, %arg4: !torch.optional<list<int>>, %arg5: !torch.optional<list<int>>, %arg6: !torch.optional<int>, %arg7: !torch.bool) -> !torch.list<int> {\n"
+"    %0 = call @__torch__.torch.jit._shape_functions.mm(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
+"    return %0 : !torch.list<int>\n"
+"  }\n"
 "  func.func @\"__torch_mlir_shape_fn.aten._int_mm\"(%arg0: !torch.list<int>, %arg1: !torch.list<int>) -> !torch.list<int> {\n"
 "    %0 = call @__torch__.torch.jit._shape_functions.mm(%arg0, %arg1) : (!torch.list<int>, !torch.list<int>) -> !torch.list<int>\n"
 "    return %0 : !torch.list<int>\n"
@@ -15544,6 +15548,18 @@ StringRef mlir::torch::Torch::getAbstractInterpLibrary() {
 "    }\n"
 "    return %6 : !torch.int\n"
 "  }\n"
+"  func.func @\"__torch_mlir_dtype_fn.aten._scaled_mm\"(%arg0: !torch.tuple<int, int>, %arg1: !torch.tuple<int, int>, %arg2: !torch.tuple<int, int>, %arg3: !torch.tuple<int, int>, %arg4: !torch.optional<tuple<int, int>>, %arg5: !torch.optional<tuple<int, int>>, %arg6: !torch.optional<int>, %arg7: !torch.bool) -> !torch.int {\n"
+"    %none = torch.constant.none\n"
+"    %0:2 = torch.prim.TupleUnpack %arg0 : !torch.tuple<int, int> -> !torch.int, !torch.int\n"
+"    %1 = torch.aten.__isnot__ %arg6, %none : !torch.optional<int>, !torch.none -> !torch.bool\n"
+"    %2 = torch.prim.If %1 -> (!torch.int) {\n"
+"      %3 = torch.prim.unchecked_cast %arg6 : !torch.optional<int> -> !torch.int\n"
+"      torch.prim.If.yield %3 : !torch.int\n"
+"    } else {\n"
+"      torch.prim.If.yield %0#1 : !torch.int\n"
+"    }\n"
+"    return %2 : !torch.int\n"
+"  }\n"
 "  func.func @\"__torch_mlir_dtype_fn.aten._int_mm\"(%arg0: !torch.tuple<int, int>, %arg1: !torch.tuple<int, int>) -> !torch.int {\n"
 "    %int3 = torch.constant.int 3\n"
 "    %none = torch.constant.none\n"

diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py
@@ -908,6 +908,9 @@ def aten〇mv〡shape(self: List[int], vec: List[int]) -> List[int]:
 def aten〇mm〡shape(self: List[int], mat2: List[int]) -> List[int]:
     return upstream_shape_functions.mm(self, mat2)
 
+def aten〇_scaled_mm〡shape(self: List[int], mat2: List[int], scale_a: List[int], scale_b: List[int], bias: Optional[List[int]] = None, scale_result: Optional[List[int]] = None, out_dtype: Optional[int] = None, use_fast_accum: bool = False) -> List[int]:
+    return upstream_shape_functions.mm(self, mat2)
+
 def aten〇_int_mm〡shape(self: List[int], mat2: List[int]) -> List[int]:
     return upstream_shape_functions.mm(self, mat2)
 
@@ -4682,6 +4685,13 @@ def aten〇mm〡dtype(self_rank_dtype: Tuple[int, int], mat2_rank_dtype: Tuple[i
     dtypes = [self_dtype, mat2_dtype]
     return promote_dtypes(ranks, dtypes)
 
+def aten〇_scaled_mm〡dtype(self_rank_dtype: Tuple[int, int], mat2_rank_dtype: Tuple[int, int], scale_a_rank_dtype: Tuple[int, int], scale_b_rank_dtype: Tuple[int, int], bias_rank_dtype: Optional[Tuple[int, int]] = None, scale_result_rank_dtype: Optional[Tuple[int, int]] = None, out_dtype: Optional[int] = None, use_fast_accum: bool = False) -> int:
+    self_rank, self_dtype = self_rank_dtype
+    mat2_rank, mat2_dtype = mat2_rank_dtype
+    if out_dtype is not None:
+        return out_dtype
+    return self_dtype
+
 def aten〇_int_mm〡dtype(self_rank_dtype: Tuple[int, int], mat2_rank_dtype: Tuple[int, int]) -> int:
     self_rank, self_dtype = self_rank_dtype
     mat2_rank, mat2_dtype = mat2_rank_dtype
@@ -6369,4 +6379,3 @@ def _create_argparse() -> argparse.ArgumentParser:
 
 if __name__ == "__main__":
     main(_create_argparse().parse_args())
-
diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py
@@ -574,6 +574,9 @@ def emit_with_mutating_variants(key, **kwargs):
     # Non-elementwise tensor compute ops
     emit("aten::linear : (Tensor, Tensor, Tensor?) -> (Tensor)")
     emit("aten::mm : (Tensor, Tensor) -> (Tensor)")
+    emit(
+        "aten::_scaled_mm : (Tensor, Tensor, Tensor, Tensor, Tensor?, Tensor?, int?, bool) -> (Tensor)"
+    )
     emit("aten::_int_mm : (Tensor, Tensor) -> (Tensor)")
     emit("aten::addmm : (Tensor, Tensor, Tensor, Scalar, Scalar) -> (Tensor)")
     emit("aten::matmul : (Tensor, Tensor) -> (Tensor)")