diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index 0e35a4b3..6627feab 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -28,6 +28,7 @@ std::unique_ptr createMapToAcceleratorPass(); std::unique_ptr createGenerateCodePass(); std::unique_ptr createFuseControlFlowPass(); std::unique_ptr createCanonicalizeLiveInPass(); +std::unique_ptr createCanonicalizeCastPass(); #define GEN_PASS_REGISTRATION #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 5c19bf94..9a89e957 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -86,4 +86,15 @@ def CanonicalizeLiveIn : Pass<"canonicalize-live-in", "ModuleOp"> { let constructor = "neura::createCanonicalizeLiveInPass()"; } +def CanonicalizeCast : Pass<"canonicalize-cast", "ModuleOp"> { + let summary = "Canonicalizes cast operations in the Neura dialect"; + let description = [{ + This pass applies canonicalization transformations to neura::cast operations. + The canonicalization includes: + 1. Removing redundant casts. + 2. Converting index (i64) types to i64 (index). + }]; + let constructor = "neura::createCanonicalizeCastPass()"; +} + #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt index 15ba146f..d419bf9e 100644 --- a/lib/NeuraDialect/Transforms/CMakeLists.txt +++ b/lib/NeuraDialect/Transforms/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_library( GenerateCodePass.cpp FuseControlFlowPass.cpp CanonicalizeLiveInPass.cpp + CanonicalizeCastPass.cpp DEPENDS MLIRNeuraTransformsIncGen diff --git a/lib/NeuraDialect/Transforms/CanonicalizeCastPass.cpp b/lib/NeuraDialect/Transforms/CanonicalizeCastPass.cpp new file mode 100644 index 00000000..5e4c1ebc --- /dev/null +++ b/lib/NeuraDialect/Transforms/CanonicalizeCastPass.cpp @@ -0,0 +1,144 @@ +#include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Region.h" +#include "mlir/IR/Value.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" + +using namespace mlir; + +namespace { + +LogicalResult canonicalizeCast(Region ®ion) { + // Handles block arguments. + for (Block &block : region.getBlocks()) { + for (BlockArgument arg : block.getArguments()) { + if (arg.getType().isIndex()) { + // Replaces index type with i64. + arg.setType(IntegerType::get(arg.getContext(), 64)); + } + } + } + + region.walk([&](Operation *op) { + // Handles the value attributes in neura::ConstantOp. + if (isa(op)) { + Attribute value_attr = op->getAttr("value"); + if (!value_attr) { + return; + } + if (IntegerAttr int_attr = dyn_cast(value_attr)) { + if (isa(op->getResult(0).getType())) { + return; + } + if (isa(op->getResult(0).getType())) { + IntegerAttr new_attr = IntegerAttr::get( + IntegerType::get(op->getContext(), 64), int_attr.getInt()); + op->setAttr("value", new_attr); + } + } + } + + // Replaces all index types with i64. + for (OpResult result : op->getOpResults()) { + auto type = result.getType(); + if (isa(type)) { + result.setType(mlir::IntegerType::get(op->getContext(), 64)); + } + } + + if (neura::CastOp cast_op = dyn_cast(op)) { + StringAttr cast_type_attr = + cast_op->getAttrOfType("cast_type"); + if (!cast_type_attr) + return; + StringRef cast_type = cast_type_attr.getValue(); + + Type src_type = cast_op->getOperand(0).getType(); + Type dst_type = cast_op->getResult(0).getType(); + + // Reomoves the index->i64 or i64->index cast operations. + if ((cast_type == "index_to_int" && isa(src_type) && + isa(dst_type) && + dyn_cast(src_type).getWidth() == 64 && + dyn_cast(dst_type).getWidth() == 64) || + (cast_type == "int_to_index" && isa(src_type) && + isa(dst_type) && + dyn_cast(src_type).getWidth() == 64 && + dyn_cast(dst_type).getWidth() == 64)) { + cast_op->getResult(0).replaceAllUsesWith(cast_op->getOperand(0)); + cast_op->erase(); + return; + } + + // Changes index->i32 or i32->index casts to i64->i32 or i32->i64. + if (cast_type == "index_to_int" && isa(dst_type) && + dyn_cast(dst_type).getWidth() == 32) { + cast_op->setAttr("cast_type", + StringAttr::get(op->getContext(), "i64_to_i32")); + return; + } + if (cast_type == "int_to_index" && isa(src_type) && + dyn_cast(src_type).getWidth() == 32) { + cast_op->setAttr("cast_type", + StringAttr::get(op->getContext(), "i32_to_i64")); + return; + } + // TODO: Handles other cast types if needed. + } + }); + return success(); +} + +struct CanonicalizeCastPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeCastPass) + StringRef getArgument() const override { return "canonicalize-cast"; } + StringRef getDescription() const override { + return "Canonicalizes cast operations in the Neura dialect, specifically " + "removing unnecessary index to i64 casts and vice versa."; + } + + void runOnOperation() override { + auto module_op = getOperation(); + + module_op.walk([&](Operation *op) { + Region *region = nullptr; + if (auto func_op = dyn_cast(op)) { + auto accel_attr = func_op->getAttrOfType("accelerator"); + if (!accel_attr || accel_attr.getValue() != "neura") { + return; + } + region = &func_op.getBody(); + } else if (auto llvm_func = dyn_cast(op)) { + auto accel_attr = llvm_func->getAttrOfType("accelerator"); + if (!accel_attr || accel_attr.getValue() != "neura") { + return; + } + region = &llvm_func.getBody(); + } else { + return; + } + + if (!region || region->empty()) { + return; + } + + if (failed(canonicalizeCast(*region))) { + signalPassFailure(); + return; + } + }); + } +}; +} // namespace + +namespace mlir::neura { +std::unique_ptr createCanonicalizeCastPass() { + return std::make_unique(); +} +} // namespace mlir::neura \ No newline at end of file diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.mlir b/test/controflow_fuse/perfect_nested/perfect_nested.mlir index a54aaaa1..4f115df6 100644 --- a/test/controflow_fuse/perfect_nested/perfect_nested.mlir +++ b/test/controflow_fuse/perfect_nested/perfect_nested.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA module attributes {} { @@ -45,6 +46,31 @@ module attributes {} { // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } +// CAST: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64 +// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64 +// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64 +// CAST-NEXT: neura.br %2 : i64 to ^bb1 +// CAST-NEXT: ^bb1(%3: i64): // 2 preds: ^bb0, ^bb5 +// CAST-NEXT: %4 = "neura.icmp"(%3, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: neura.cond_br %4 : i1 then to ^bb2 else to ^bb6 +// CAST-NEXT: ^bb2: // pred: ^bb1 +// CAST-NEXT: neura.br %2 : i64 to ^bb3 +// CAST-NEXT: ^bb3(%5: i64): // 2 preds: ^bb2, ^bb4 +// CAST-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb4 else to ^bb5 +// CAST-NEXT: ^bb4: // pred: ^bb3 +// CAST-NEXT: %7 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %5 : i64, i64, i64, i64, i64, i64] memref : i8 +// CAST-NEXT: neura.store_indexed %7 to %arg1[%2, %2, %3, %2, %2, %5 : i64, i64, i64, i64, i64, i64] memref : i8 +// CAST-NEXT: %8 = "neura.add"(%5, %0) : (i64, i64) -> i64 +// CAST-NEXT: neura.br %8 : i64 to ^bb3 +// CAST-NEXT: ^bb5: // pred: ^bb3 +// CAST-NEXT: %9 = "neura.add"(%3, %0) : (i64, i64) -> i64 +// CAST-NEXT: neura.br %9 : i64 to ^bb1 +// CAST-NEXT: ^bb6: // pred: ^bb1 +// CAST-NEXT: "neura.return"() : () -> () +// CAST-NEXT: } + // CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data // CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data diff --git a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir index d5e6f446..d77f57f1 100644 --- a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir +++ b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA module attributes {} { @@ -50,6 +51,32 @@ module attributes {} { // CHECK-NEXT: "neura.return"(%6) : (i32) -> () // CHECK-NEXT: } +// CAST: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64 +// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64 +// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> i32 +// CAST-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64 +// CAST-NEXT: neura.br %3, %2 : i64, i32 to ^bb1 +// CAST-NEXT: ^bb1(%4: i64, %5: i32): // 2 preds: ^bb0, ^bb5 +// CAST-NEXT: %6 = "neura.icmp"(%4, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 +// CAST-NEXT: ^bb2: // pred: ^bb1 +// CAST-NEXT: neura.br %3, %5 : i64, i32 to ^bb3 +// CAST-NEXT: ^bb3(%7: i64, %8: i32): // 2 preds: ^bb2, ^bb4 +// CAST-NEXT: %9 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: neura.cond_br %9 : i1 then to ^bb4 else to ^bb5 +// CAST-NEXT: ^bb4: // pred: ^bb3 +// CAST-NEXT: %10 = neura.load_indexed %arg0[%4, %7 : i64, i64] memref : i32 +// CAST-NEXT: %11 = "neura.add"(%8, %10) : (i32, i32) -> i32 +// CAST-NEXT: %12 = "neura.add"(%7, %0) : (i64, i64) -> i64 +// CAST-NEXT: neura.br %12, %11 : i64, i32 to ^bb3 +// CAST-NEXT: ^bb5: // pred: ^bb3 +// CAST-NEXT: %13 = "neura.add"(%4, %0) : (i64, i64) -> i64 +// CAST-NEXT: neura.br %13, %8 : i64, i32 to ^bb1 +// CAST-NEXT: ^bb6: // pred: ^bb1 +// CAST-NEXT: "neura.return"(%5) : (i32) -> () +// CAST-NEXT: } + // CTRL2DATA: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data // CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data diff --git a/test/controflow_fuse/simpleloop/simpleloop.mlir b/test/controflow_fuse/simpleloop/simpleloop.mlir index 53ff98fe..41cdad3a 100644 --- a/test/controflow_fuse/simpleloop/simpleloop.mlir +++ b/test/controflow_fuse/simpleloop/simpleloop.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST // RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA module attributes {} { @@ -35,6 +36,23 @@ module attributes {} { // CHECK-NEXT: "neura.return"(%6) : (i32) -> () // CHECK-NEXT: } +// CAST: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64 +// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64 +// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> i32 +// CAST-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64 +// CAST-NEXT: neura.br %3, %2 : i64, i32 to ^bb1 +// CAST-NEXT: ^bb1(%4: i64, %5: i32): // 2 preds: ^bb0, ^bb2 +// CAST-NEXT: %6 = "neura.icmp"(%4, %1) <{cmpType = "slt"}> : (i64, i64) -> i1 +// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb3 +// CAST-NEXT: ^bb2: // pred: ^bb1 +// CAST-NEXT: %7 = "neura.cast"(%4) <{cast_type = "i64_to_i32"}> : (i64) -> i32 +// CAST-NEXT: %8 = "neura.add"(%5, %7) : (i32, i32) -> i32 +// CAST-NEXT: %9 = "neura.add"(%4, %0) : (i64, i64) -> i64 +// CAST-NEXT: neura.br %9, %8 : i64, i32 to ^bb1 +// CAST-NEXT: ^bb3: // pred: ^bb1 +// CAST-NEXT: "neura.return"(%5) : (i32) -> () +// CAST-NEXT: } // CTRL2DATA: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data