diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 910da7c5..628190c4 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -18,8 +18,8 @@ def Neura_ConstantOp : Op { def Neura_AddOp : Op { let summary = "Integer addition operation"; let opName = "add"; - let arguments = (ins AnyInteger:$lhs, AnyInteger:$rhs, Optional:$predicate); - let results = (outs AnyInteger:$result); + let arguments = (ins AnyType:$lhs, AnyType:$rhs, Optional:$predicate); + let results = (outs AnyType:$result); // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)"; let traits = [SameOperandsAndResultElementType]; } diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index ab0335e6..3af34324 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -28,7 +28,8 @@ void getBlocksInPostOrder(Block *startBlock, SmallVectorImpl &postOrder // Creates phi nodes for all live-in values in the given block. void createPhiNodesForBlock(Block *block, OpBuilder &builder, - DenseMap &value_map) { + DenseMap &value_map, + SmallVectorImpl> &deferred_ctrl_movs) { if (block->hasNoPredecessors()) { // Skips phi insertion for entry block. return; @@ -41,17 +42,9 @@ void createPhiNodesForBlock(Block *block, OpBuilder &builder, // Identifies operands defined in other blocks. if (operand.getDefiningOp() && operand.getDefiningOp()->getBlock() != block) { - // Checks if the live-in is a block argument. SSA form forces this rule. - bool found_in_block_argument = false; - for (BlockArgument arg : block->getArguments()) { - if (arg == operand) { - found_in_block_argument = true; - break; - } - } live_ins.push_back(operand); + continue; } - // Collects all block arguments. if (auto blockArg = llvm::dyn_cast(operand)) { live_ins.push_back(operand); @@ -107,7 +100,19 @@ void createPhiNodesForBlock(Block *block, OpBuilder &builder, llvm::errs() << "Unknown branch terminator in block: " << *pred << "\n"; continue; } - phi_operands.push_back(incoming); + + // If the incoming value is defined in the same block, inserts a `neura.reserve` + // and defer a backward ctrl move. + if (incoming.getDefiningOp() && incoming.getDefiningOp()->getBlock() == block) { + builder.setInsertionPointToStart(block); + auto placeholder = builder.create(loc, incoming.getType()); + phi_operands.push_back(placeholder.getResult()); + // Defers the backward ctrl move operation to be inserted after all phi operands + // are defined. Inserted: (real_defined_value, just_created_reserve, current_block). + deferred_ctrl_movs.emplace_back(incoming, placeholder.getResult(), block); + } else { + phi_operands.push_back(incoming); + } } } @@ -181,6 +186,13 @@ struct TransformCtrlToDataFlowPass void runOnOperation() override { ModuleOp module = getOperation(); + // Declares a vector to hold deferred backward ctrl move operations. + // This is useful when a live-in value is defined within the same block. + // The tuple contains: + // - real value (the one that is defined in the same block, after the placeholder) + // - placeholder value (the one that will be used in the phi node) + // - block where the backward ctrl move should be inserted + SmallVector, 4> deferred_ctrl_movs; module.walk([&](func::FuncOp func) { // Get blocks in post-order SmallVector postOrder; @@ -194,7 +206,7 @@ struct TransformCtrlToDataFlowPass // Process blocks bottom-up for (Block *block : postOrder) { // Creates phi nodes for live-ins. - createPhiNodesForBlock(block, builder, value_map); + createPhiNodesForBlock(block, builder, value_map, deferred_ctrl_movs); } // Flattens blocks into the entry block. @@ -234,6 +246,15 @@ struct TransformCtrlToDataFlowPass block->erase(); } }); + + // Inserts the deferred backward ctrl move operations after phi operands + // are defined. + for (auto &[realVal, placeholder, block] : deferred_ctrl_movs) { + Operation *defOp = realVal.getDefiningOp(); + assert(defOp && "Backward ctrl move's source must be an op result"); + OpBuilder movBuilder(defOp->getBlock(), ++Block::iterator(defOp)); + movBuilder.create(defOp->getLoc(), realVal, placeholder); + } } }; } // namespace diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir new file mode 100644 index 00000000..d6207104 --- /dev/null +++ b/test/neura/ctrl/branch_for.mlir @@ -0,0 +1,65 @@ +// RUN: mlir-neura-opt %s \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --leverage-predicated-value \ +// RUN: | FileCheck %s + +// RUN: mlir-neura-opt %s \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: | FileCheck %s -check-prefix=CTRL2DATA + +func.func @loop_test() -> f32 { + %n = llvm.mlir.constant(10 : i64) : i64 + %c0 = llvm.mlir.constant(0 : i64) : i64 + %c1 = llvm.mlir.constant(1 : i64) : i64 + %c1f = llvm.mlir.constant(3.0 : f32) : f32 + %acc_init = llvm.mlir.constant(0.0 : f32) : f32 + + llvm.br ^bb1(%c0, %acc_init : i64, f32) + +^bb1(%i: i64, %acc: f32): // loop body + check + increment + %next_acc = llvm.fadd %acc, %c1f : f32 + %i_next = llvm.add %i, %c1 : i64 + %cmp = llvm.icmp "slt" %i_next, %n : i64 + llvm.cond_br %cmp, ^bb1(%i_next, %next_acc : i64, f32), ^exit(%next_acc : f32) + +^exit(%result: f32): + return %result : f32 +} + +// CHECK: func.func @loop_test() -> f32 attributes {accelerator = "neura"} { +// CHECK-NEXT: %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data +// CHECK-NEXT: %1 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data +// CHECK-NEXT: %2 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data +// CHECK-NEXT: %3 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data +// CHECK-NEXT: %4 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data +// CHECK-NEXT: neura.br %1, %4 : !neura.data, !neura.data to ^bb1 +// CHECK-NEXT: ^bb1(%5: !neura.data, %6: !neura.data): // 2 preds: ^bb0, ^bb1 +// CHECK-NEXT: %7 = "neura.fadd"(%6, %3) : (!neura.data, !neura.data) -> !neura.data +// CHECK-NEXT: %8 = "neura.add"(%5, %2) : (!neura.data, !neura.data) -> !neura.data +// CHECK-NEXT: %9 = "neura.icmp"(%8, %0) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-NEXT: neura.cond_br %9 : !neura.data then %8, %7 : !neura.data, !neura.data to ^bb1 else %7 : !neura.data to ^bb2 +// CHECK-NEXT: ^bb2(%10: !neura.data): // pred: ^bb1 +// CHECK-NEXT: "neura.return"(%10) : (!neura.data) -> () +// CHECK-NEXT: } + +// CTRL2DATA: func.func @loop_test() -> f32 attributes {accelerator = "neura"} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %6 = "neura.phi"(%1, %5) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %7 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %8 = "neura.phi"(%4, %7) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %9 = "neura.fadd"(%8, %3) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %9 -> %7 : !neura.data !neura.data +// CTRL2DATA-NEXT: %10 = "neura.add"(%6, %2) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %10 -> %5 : !neura.data !neura.data +// CTRL2DATA-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.return"(%9) : (!neura.data) -> () +// CTRL2DATA-NEXT: }