Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/NeuraDialect/NeuraPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ std::unique_ptr<mlir::Pass> createMapToAcceleratorPass();
std::unique_ptr<mlir::Pass> createGenerateCodePass();
std::unique_ptr<mlir::Pass> createFuseControlFlowPass();
std::unique_ptr<mlir::Pass> createCanonicalizeLiveInPass();
std::unique_ptr<mlir::Pass> createCanonicalizeCastPass();

#define GEN_PASS_REGISTRATION
#include "NeuraDialect/NeuraPasses.h.inc"
Expand Down
11 changes: 11 additions & 0 deletions include/NeuraDialect/NeuraPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,15 @@ def CanonicalizeLiveIn : Pass<"canonicalize-live-in", "ModuleOp"> {
let constructor = "neura::createCanonicalizeLiveInPass()";
}

def CanonicalizeCast : Pass<"canonicalize-cast", "ModuleOp"> {
let summary = "Canonicalizes cast operations in the Neura dialect";
let description = [{
This pass applies canonicalization transformations to neura::cast operations.
The canonicalization includes:
1. Removing redundant casts.
2. Converting index (i64) types to i64 (index).
}];
let constructor = "neura::createCanonicalizeCastPass()";
}

#endif // NEURA_PASSES_TD
1 change: 1 addition & 0 deletions lib/NeuraDialect/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ add_mlir_library(
GenerateCodePass.cpp
FuseControlFlowPass.cpp
CanonicalizeLiveInPass.cpp
CanonicalizeCastPass.cpp

DEPENDS
MLIRNeuraTransformsIncGen
Expand Down
144 changes: 144 additions & 0 deletions lib/NeuraDialect/Transforms/CanonicalizeCastPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#include "NeuraDialect/NeuraOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Block.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/Region.h"
#include "mlir/IR/Value.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"

using namespace mlir;

namespace {

LogicalResult canonicalizeCast(Region &region) {
// Handles block arguments.
for (Block &block : region.getBlocks()) {
for (BlockArgument arg : block.getArguments()) {
if (arg.getType().isIndex()) {
// Replaces index type with i64.
arg.setType(IntegerType::get(arg.getContext(), 64));
}
}
}

region.walk([&](Operation *op) {
// Handles the value attributes in neura::ConstantOp.
if (isa<neura::ConstantOp>(op)) {
Attribute value_attr = op->getAttr("value");
if (!value_attr) {
return;
}
if (IntegerAttr int_attr = dyn_cast<IntegerAttr>(value_attr)) {
if (isa<IntegerType>(op->getResult(0).getType())) {
return;
}
if (isa<IndexType>(op->getResult(0).getType())) {
IntegerAttr new_attr = IntegerAttr::get(
IntegerType::get(op->getContext(), 64), int_attr.getInt());
op->setAttr("value", new_attr);
}
}
}

// Replaces all index types with i64.
for (OpResult result : op->getOpResults()) {
auto type = result.getType();
if (isa<IndexType>(type)) {
result.setType(mlir::IntegerType::get(op->getContext(), 64));
}
}

if (neura::CastOp cast_op = dyn_cast<neura::CastOp>(op)) {
StringAttr cast_type_attr =
cast_op->getAttrOfType<StringAttr>("cast_type");
if (!cast_type_attr)
return;
StringRef cast_type = cast_type_attr.getValue();

Type src_type = cast_op->getOperand(0).getType();
Type dst_type = cast_op->getResult(0).getType();

// Reomoves the index->i64 or i64->index cast operations.
if ((cast_type == "index_to_int" && isa<IntegerType>(src_type) &&
isa<IntegerType>(dst_type) &&
dyn_cast<IntegerType>(src_type).getWidth() == 64 &&
dyn_cast<IntegerType>(dst_type).getWidth() == 64) ||
(cast_type == "int_to_index" && isa<IntegerType>(src_type) &&
isa<IntegerType>(dst_type) &&
dyn_cast<IntegerType>(src_type).getWidth() == 64 &&
dyn_cast<IntegerType>(dst_type).getWidth() == 64)) {
cast_op->getResult(0).replaceAllUsesWith(cast_op->getOperand(0));
cast_op->erase();
return;
}

// Changes index->i32 or i32->index casts to i64->i32 or i32->i64.
if (cast_type == "index_to_int" && isa<IntegerType>(dst_type) &&
dyn_cast<IntegerType>(dst_type).getWidth() == 32) {
cast_op->setAttr("cast_type",
StringAttr::get(op->getContext(), "i64_to_i32"));
return;
}
if (cast_type == "int_to_index" && isa<IntegerType>(src_type) &&
dyn_cast<IntegerType>(src_type).getWidth() == 32) {
cast_op->setAttr("cast_type",
StringAttr::get(op->getContext(), "i32_to_i64"));
return;
}
// TODO: Handles other cast types if needed.
}
});
return success();
}

struct CanonicalizeCastPass
: public PassWrapper<CanonicalizeCastPass, OperationPass<ModuleOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(CanonicalizeCastPass)
StringRef getArgument() const override { return "canonicalize-cast"; }
StringRef getDescription() const override {
return "Canonicalizes cast operations in the Neura dialect, specifically "
"removing unnecessary index to i64 casts and vice versa.";
}

void runOnOperation() override {
auto module_op = getOperation();

module_op.walk([&](Operation *op) {
Region *region = nullptr;
if (auto func_op = dyn_cast<func::FuncOp>(op)) {
auto accel_attr = func_op->getAttrOfType<StringAttr>("accelerator");
if (!accel_attr || accel_attr.getValue() != "neura") {
return;
}
region = &func_op.getBody();
} else if (auto llvm_func = dyn_cast<LLVM::LLVMFuncOp>(op)) {
auto accel_attr = llvm_func->getAttrOfType<StringAttr>("accelerator");
if (!accel_attr || accel_attr.getValue() != "neura") {
return;
}
region = &llvm_func.getBody();
} else {
return;
}

if (!region || region->empty()) {
return;
}

if (failed(canonicalizeCast(*region))) {
signalPassFailure();
return;
}
});
}
};
} // namespace

namespace mlir::neura {
std::unique_ptr<mlir::Pass> createCanonicalizeCastPass() {
return std::make_unique<CanonicalizeCastPass>();
}
} // namespace mlir::neura
26 changes: 26 additions & 0 deletions test/controflow_fuse/perfect_nested/perfect_nested.mlir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA

module attributes {} {
Expand Down Expand Up @@ -45,6 +46,31 @@ module attributes {} {
// CHECK-NEXT: "neura.return"() : () -> ()
// CHECK-NEXT: }

// CAST: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64
// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64
// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64
// CAST-NEXT: neura.br %2 : i64 to ^bb1
// CAST-NEXT: ^bb1(%3: i64): // 2 preds: ^bb0, ^bb5
// CAST-NEXT: %4 = "neura.icmp"(%3, %1) <{cmpType = "slt"}> : (i64, i64) -> i1
// CAST-NEXT: neura.cond_br %4 : i1 then to ^bb2 else to ^bb6
// CAST-NEXT: ^bb2: // pred: ^bb1
// CAST-NEXT: neura.br %2 : i64 to ^bb3
// CAST-NEXT: ^bb3(%5: i64): // 2 preds: ^bb2, ^bb4
// CAST-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (i64, i64) -> i1
// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb4 else to ^bb5
// CAST-NEXT: ^bb4: // pred: ^bb3
// CAST-NEXT: %7 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %5 : i64, i64, i64, i64, i64, i64] memref<?x1x1x1x1x128xi8> : i8
// CAST-NEXT: neura.store_indexed %7 to %arg1[%2, %2, %3, %2, %2, %5 : i64, i64, i64, i64, i64, i64] memref<?x1x128x1x1x128xi8> : i8
// CAST-NEXT: %8 = "neura.add"(%5, %0) : (i64, i64) -> i64
// CAST-NEXT: neura.br %8 : i64 to ^bb3
// CAST-NEXT: ^bb5: // pred: ^bb3
// CAST-NEXT: %9 = "neura.add"(%3, %0) : (i64, i64) -> i64
// CAST-NEXT: neura.br %9 : i64 to ^bb1
// CAST-NEXT: ^bb6: // pred: ^bb1
// CAST-NEXT: "neura.return"() : () -> ()
// CAST-NEXT: }

// CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data<index, i1>
// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data<index, i1>) -> !neura.data<index, i1>
Expand Down
27 changes: 27 additions & 0 deletions test/controflow_fuse/perfect_reduction/perfect_reduction.mlir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA

module attributes {} {
Expand Down Expand Up @@ -50,6 +51,32 @@ module attributes {} {
// CHECK-NEXT: "neura.return"(%6) : (i32) -> ()
// CHECK-NEXT: }

// CAST: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref<?x128xi32>) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64
// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64
// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> i32
// CAST-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64
// CAST-NEXT: neura.br %3, %2 : i64, i32 to ^bb1
// CAST-NEXT: ^bb1(%4: i64, %5: i32): // 2 preds: ^bb0, ^bb5
// CAST-NEXT: %6 = "neura.icmp"(%4, %1) <{cmpType = "slt"}> : (i64, i64) -> i1
// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6
// CAST-NEXT: ^bb2: // pred: ^bb1
// CAST-NEXT: neura.br %3, %5 : i64, i32 to ^bb3
// CAST-NEXT: ^bb3(%7: i64, %8: i32): // 2 preds: ^bb2, ^bb4
// CAST-NEXT: %9 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (i64, i64) -> i1
// CAST-NEXT: neura.cond_br %9 : i1 then to ^bb4 else to ^bb5
// CAST-NEXT: ^bb4: // pred: ^bb3
// CAST-NEXT: %10 = neura.load_indexed %arg0[%4, %7 : i64, i64] memref<?x128xi32> : i32
// CAST-NEXT: %11 = "neura.add"(%8, %10) : (i32, i32) -> i32
// CAST-NEXT: %12 = "neura.add"(%7, %0) : (i64, i64) -> i64
// CAST-NEXT: neura.br %12, %11 : i64, i32 to ^bb3
// CAST-NEXT: ^bb5: // pred: ^bb3
// CAST-NEXT: %13 = "neura.add"(%4, %0) : (i64, i64) -> i64
// CAST-NEXT: neura.br %13, %8 : i64, i32 to ^bb1
// CAST-NEXT: ^bb6: // pred: ^bb1
// CAST-NEXT: "neura.return"(%5) : (i32) -> ()
// CAST-NEXT: }

// CTRL2DATA: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref<?x128xi32>) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data<index, i1>
// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data<index, i1>) -> !neura.data<index, i1>
Expand Down
18 changes: 18 additions & 0 deletions test/controflow_fuse/simpleloop/simpleloop.mlir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --canonicalize-cast | FileCheck %s --check-prefix=CAST
// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA

module attributes {} {
Expand Down Expand Up @@ -35,6 +36,23 @@ module attributes {} {
// CHECK-NEXT: "neura.return"(%6) : (i32) -> ()
// CHECK-NEXT: }

// CAST: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CAST-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64
// CAST-NEXT: %1 = "neura.constant"() <{predicate = true, value = 128 : i64}> : () -> i64
// CAST-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> i32
// CAST-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64
// CAST-NEXT: neura.br %3, %2 : i64, i32 to ^bb1
// CAST-NEXT: ^bb1(%4: i64, %5: i32): // 2 preds: ^bb0, ^bb2
// CAST-NEXT: %6 = "neura.icmp"(%4, %1) <{cmpType = "slt"}> : (i64, i64) -> i1
// CAST-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb3
// CAST-NEXT: ^bb2: // pred: ^bb1
// CAST-NEXT: %7 = "neura.cast"(%4) <{cast_type = "i64_to_i32"}> : (i64) -> i32
// CAST-NEXT: %8 = "neura.add"(%5, %7) : (i32, i32) -> i32
// CAST-NEXT: %9 = "neura.add"(%4, %0) : (i64, i64) -> i64
// CAST-NEXT: neura.br %9, %8 : i64, i32 to ^bb1
// CAST-NEXT: ^bb3: // pred: ^bb1
// CAST-NEXT: "neura.return"(%5) : (i32) -> ()
// CAST-NEXT: }

// CTRL2DATA: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
// CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 1 : index}> : () -> !neura.data<index, i1>
Expand Down