diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 81f8dda4..00000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.cache/ -build/ -test/lit.cfg \ No newline at end of file diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index 25028b57..537e3b41 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -32,6 +32,23 @@ def Neura_SubOp : Op { let traits = [SameOperandsAndResultElementType]; } +def Neura_MulOp : Op { + let summary = "Integer multiplication operation"; + let opName = "mul"; + let arguments = (ins AnyType:$lhs, AnyType:$rhs, Optional:$predicate); + let results = (outs AnyType:$result); + // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + +def Neura_DivOp : Op { + let summary = "Integer division operation"; + let arguments = (ins AnyType:$lhs, AnyType:$rhs, Optional:$predicate); + let results = (outs AnyType:$result); + // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + // Defines a floating-point addition operation. def Neura_FAddOp : Op { let summary = "Floating addition operation"; @@ -147,7 +164,7 @@ def Neura_StoreIndexedOp: Op { let summary = "Pointer computation using offset indices"; - let arguments = (ins AnyType:$base, Variadic:$indicesAndPredicate); + let arguments = (ins AnyType:$base, Variadic:$indicesAndPredicate); let results = (outs AnyType:$result); // let assemblyFormat = "$base `[` $indicesAndPredicate `]` `,` $predicate attr-dict"; } @@ -170,7 +187,7 @@ def Neura_Br : Op { } def Neura_SelOp : Op { - let arguments = (ins AnyType:$ifTrue, AnyType:$ifFalse, I1:$cond); + let arguments = (ins AnyType:$ifTrue, AnyType:$ifFalse, AnyType:$cond); let results = (outs AnyType:$result); // let assemblyFormat = "$ifTrue `,` $ifFalse `,` $cond attr-dict `:` type($ifTrue)"; } @@ -351,4 +368,36 @@ def Neura_GrantAlwaysOp : Op { let results = (outs AnyType:$result); // let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)"; +} + +// ---------------------------------------------------- +// Defines fused control flow operations. + +def Neura_LoopControllerOp : Op{ + let summary = "Generates loop indicies and valid predicates."; + let description = [{ + Manages a single level of loop execution based on cycle counting. + Each loop_controller outputs a current index value and a valid predicate. + + The loop_controller uses dynamic loop bounds (start, end, step), + allowing for variable-length loops and runtime-determined bounds. + + The execution is conditioned on the parent_valid input, creating an + efficient hierarchical structure for nested loops. + }]; + + let arguments = (ins + AnyType:$parent_valid, // Valid predicate from the parent loop + AnyType:$start, // Start index of the loop + AnyType:$end, // End index of the loop + AnyType:$step // Step size for the loop + ); + + let results = (outs + AnyType:$index, // Current loop index + AnyType:$valid // Valid predicate for the current index + ); + + let assemblyFormat = + "$parent_valid `(` $start `,` $end `,` $step `)` attr-dict `:` type($parent_valid) `,` type($start) `,` type($end) `,` type($step) `->` type($index) `,` type($valid)"; } \ No newline at end of file diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index 8b444603..0c8c252a 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -26,6 +26,7 @@ std::unique_ptr createTransformCtrlToDataFlowPass(); std::unique_ptr createLeveragePredicatedValuePass(); std::unique_ptr createMapToAcceleratorPass(); std::unique_ptr createGenerateCodePass(); +std::unique_ptr createFuseControlFlowPass(); #define GEN_PASS_REGISTRATION #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 823a4b55..98a0402b 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -67,4 +67,12 @@ def GenerateCode : Pass<"generate-code", "ModuleOp"> { let constructor = "neura::createGenerateCodePass()"; } +def FuseControlFlow: Pass<"fuse-control-flow", "ModuleOp">{ + let summary = "Fuses control flow operations in the Neura dialect"; + let description = [{ + This pass fuses control flow operations. + }]; + let constructor = "neura::createFuseControlFlowPass()"; +} + #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp index e1960b66..e7ddf1a5 100644 --- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp +++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp @@ -108,6 +108,22 @@ struct ArithSubFToNeuraFSub : public OpRewritePattern { } }; +struct ArithMulIToNeuraMul : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::MulIOp op, + PatternRewriter &rewriter) const override { + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + Type result_type = op.getType(); + + // Optional predicate: default to null. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + nullptr); + return success(); + } +}; + struct ArithMulFToNeuraFMul : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -124,6 +140,21 @@ struct ArithMulFToNeuraFMul : public OpRewritePattern { } }; +struct ArithDivSIToNeuraDiv : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(arith::DivSIOp op, + PatternRewriter &rewriter) const override { + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + Type result_type = op.getType(); + // Converts arith DivSIOp to Neura DivOp. + // Optional predicate: default to null. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + nullptr); + return success(); + } +}; + struct ArithFDivToNeuraFDiv : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -139,6 +170,30 @@ struct ArithFDivToNeuraFDiv : public OpRewritePattern { return success(); } }; + +struct ArithRemSIToNeuraOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(arith::RemSIOp op, + PatternRewriter &rewriter) const override { + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + Type result_type = op.getType(); + Location loc = op.getLoc(); + // Converts arith RemSIOp to basic Neura Op. + // Optional predicate: default to null. + Value div = + rewriter.create(loc, result_type, lhs, rhs, nullptr); + Value mul = + rewriter.create(loc, result_type, rhs, div, nullptr); + Value rem = + rewriter.create(loc, result_type, lhs, mul, nullptr); + + rewriter.replaceOp(op, rem); + return success(); + } +}; + struct ArithCmpiToNeuraICmp : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -252,8 +307,8 @@ struct ArithIndexCastToNeuraCast Type in_type = input.getType(); StringRef cast_string; - // The isa check is generic and handles any integer bit width. - // (e.g., i32, i64). + // The isa check is generic and handles any integer bit + // width (e.g., i32, i64). if (in_type.isIndex() && isa(result_type)) { cast_string = "index_to_int"; } else if (isa(in_type) && result_type.isIndex()) { @@ -294,12 +349,13 @@ struct LowerArithToNeuraPass if (target && target.getValue() == mlir::accel::kNeuraTarget) { RewritePatternSet patterns(&getContext()); mlir::neura::arith2neura::populateWithGenerated(patterns); - patterns.add(context); + patterns.add< + ArithFAddToNeuraFAdd, ArithConstantToNeuraConstant, + ArithAddIToNeuraAdd, ArithCmpiToNeuraICmp, ArithSelectToNeuraSel, + ArithExtUIToNeuraCast, ArithIndexCastToNeuraCast, + ArithFDivToNeuraFDiv, ArithExtfToNeuraCast, ArithMulFToNeuraFMul, + ArithSubIToNeuraSub, ArithSubFToNeuraFSub, ArithMulIToNeuraMul, + ArithDivSIToNeuraDiv, ArithRemSIToNeuraOp>(context); if (failed( applyPatternsGreedily(getOperation(), std::move(patterns)))) { signalPassFailure(); diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index 758c3fca..ce91d470 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -1,14 +1,14 @@ #include "Common/AcceleratorAttrs.h" +#include "Conversion/ConversionPasses.h" #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "Conversion/ConversionPasses.h" #include "llvm/Support/raw_ostream.h" namespace mlir { @@ -35,7 +35,8 @@ struct LlvmAddToNeuraAdd : public OpRewritePattern { LogicalResult matchAndRewrite(mlir::LLVM::AddOp op, PatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), op.getRhs(), Value()); + rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), + op.getRhs(), Value()); return success(); } }; @@ -54,7 +55,8 @@ struct LlvmFAddToNeuraFAdd : public OpRewritePattern { return failure(); // Optional predicate: default to 'none' - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, Value()); + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + Value()); return success(); } }; @@ -69,12 +71,13 @@ struct LlvmFSubToNeuraFSub : public OpRewritePattern { Type result_type = op->getResult(0).getType(); // Only matches scalar float. - if (!mlir::isa(result_type)){ + if (!mlir::isa(result_type)) { return failure(); } - // Optional predicate: default to 'none' - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, Value()); + // Optional predicate: default to 'none'. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + Value()); return success(); } }; @@ -84,7 +87,8 @@ struct LlvmOrToNeuraOr : public OpRewritePattern { LogicalResult matchAndRewrite(mlir::LLVM::OrOp op, PatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), op.getRhs(), Value()); + rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), + op.getRhs(), Value()); return success(); } }; @@ -102,12 +106,13 @@ struct LlvmFMulToNeuraFMul : public OpRewritePattern { if (!mlir::isa(result_type)) return failure(); - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, Value()); + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + Value()); return success(); } }; -struct LlvmVFMulToNeuraVFMul: public OpRewritePattern { +struct LlvmVFMulToNeuraVFMul : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(mlir::LLVM::FMulOp op, @@ -121,7 +126,8 @@ struct LlvmVFMulToNeuraVFMul: public OpRewritePattern { if (!vecTy || !mlir::isa(vecTy.getElementType())) return failure(); - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, Value()); + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, + Value()); return success(); } }; @@ -172,8 +178,9 @@ struct LlvmGEPToNeuraGEP : public OpRewritePattern { if (auto val = gepIndex.dyn_cast()) { indexValues.push_back(val); } else if (auto intAttr = gepIndex.dyn_cast()) { - // Create constant operation state manually - OperationState state(op.getLoc(), neura::ConstantOp::getOperationName()); + // Creates constant operation state manually. + OperationState state(op.getLoc(), + neura::ConstantOp::getOperationName()); state.addAttribute("value", intAttr); state.addAttribute("predicate", rewriter.getBoolAttr(true)); state.addTypes(rewriter.getIndexType()); @@ -184,7 +191,8 @@ struct LlvmGEPToNeuraGEP : public OpRewritePattern { } } - rewriter.replaceOpWithNewOp(op, op.getType(), base, indexValues); + rewriter.replaceOpWithNewOp(op, op.getType(), base, + indexValues); return success(); } }; @@ -194,7 +202,7 @@ struct LlvmLoadToNeuraLoad : public OpRewritePattern { LogicalResult matchAndRewrite(mlir::LLVM::LoadOp op, PatternRewriter &rewriter) const override { - Value ptr = op.getAddr(); // getPointer() is deprecated + Value ptr = op.getAddr(); // getPointer() is deprecated. Type resultType = op.getResult().getType(); rewriter.replaceOpWithNewOp(op, resultType, ptr, Value()); return success(); @@ -207,7 +215,7 @@ struct LlvmStoreToNeuraStore : public OpRewritePattern { LogicalResult matchAndRewrite(mlir::LLVM::StoreOp op, PatternRewriter &rewriter) const override { Value value = op.getValue(); - Value addr = op.getAddr(); // getPointer() is deprecated + Value addr = op.getAddr(); // getPointer() is deprecated rewriter.replaceOpWithNewOp(op, value, addr, Value()); return success(); } @@ -217,15 +225,15 @@ struct LlvmCondBrToNeuraCondBr : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(LLVM::CondBrOp op, PatternRewriter &rewriter) const override { - // Get the source operation's successors (basic blocks) + // Gets the source operation's successors (basic blocks). Block *trueDest = op.getTrueDest(); Block *falseDest = op.getFalseDest(); - // Get the operands for each destination + // Gets the operands for each destination. ValueRange trueOperands = op.getTrueDestOperands(); ValueRange falseOperands = op.getFalseDestOperands(); - // Create the new operation with proper successors + // Creates the new operation with proper successors. auto newOp = rewriter.create( op.getLoc(), // Location op.getCondition(), // Condition @@ -236,7 +244,7 @@ struct LlvmCondBrToNeuraCondBr : public OpRewritePattern { falseDest // False destination block ); - // Replace the old op with the new one + // Replaces the old op with the new one. rewriter.replaceOp(op, newOp->getResults()); return success(); @@ -248,13 +256,12 @@ struct LlvmBrToNeuraBr : public OpRewritePattern { LogicalResult matchAndRewrite(mlir::LLVM::BrOp op, PatternRewriter &rewriter) const override { - // Get the destination block and its operands + // Gets the destination block and its operands. Block *dest = op.getDest(); ValueRange destOperands = op.getDestOperands(); - // Create the new Neura_Br operation - rewriter.replaceOpWithNewOp( - op, destOperands, dest); + // Creates the new Neura_Br operation. + rewriter.replaceOpWithNewOp(op, destOperands, dest); return success(); } @@ -284,16 +291,16 @@ struct LlvmConstantToNeuraConstant : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(LLVM::ConstantOp op, - PatternRewriter &rewriter) const override { + PatternRewriter &rewriter) const override { auto attr = op.getValue(); - - // Create operation state manually + + // Creates operation state manually OperationState state(op.getLoc(), neura::ConstantOp::getOperationName()); state.addAttribute("value", attr); state.addAttribute("predicate", rewriter.getBoolAttr(true)); state.addTypes(op.getType()); - - // Create the operation and replace + + // Creates the operation and replace Operation *newOp = rewriter.create(state); rewriter.replaceOp(op, newOp->getResults()); return success(); @@ -343,7 +350,8 @@ struct LowerLlvmToNeuraPass // e.g., mlir func or llvm func). module_op.walk([&](FunctionOpInterface func) { if (func->hasAttr(mlir::accel::kAcceleratorAttr)) { - auto target = func->getAttrOfType(mlir::accel::kAcceleratorAttr); + auto target = + func->getAttrOfType(mlir::accel::kAcceleratorAttr); if (target && target.getValue() == mlir::accel::kNeuraTarget) { for (Region ®ion : func->getRegions()) { if (failed(applyPatternsGreedily(region, frozen))) { diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt index 7d944fbb..0979725f 100644 --- a/lib/NeuraDialect/Transforms/CMakeLists.txt +++ b/lib/NeuraDialect/Transforms/CMakeLists.txt @@ -10,6 +10,7 @@ add_mlir_library( LeveragePredicatedValuePass.cpp MapToAcceleratorPass.cpp GenerateCodePass.cpp + FuseControlFlowPass.cpp DEPENDS MLIRNeuraTransformsIncGen diff --git a/lib/NeuraDialect/Transforms/FuseControlFlowPass.cpp b/lib/NeuraDialect/Transforms/FuseControlFlowPass.cpp new file mode 100644 index 00000000..e3b45f94 --- /dev/null +++ b/lib/NeuraDialect/Transforms/FuseControlFlowPass.cpp @@ -0,0 +1,80 @@ +#include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/MapVector.h" + +using namespace mlir; + +#define GEN_PASS_DEF_FUSECONTROLFLOW +#include "NeuraDialect/NeuraPasses.h.inc" + +namespace { +// A class to hold loop information for the control flow fusion pass. +class LoopInfo { +public: + // Key operations in a loop. + Value reserve_val; + Value phi_val; + Value index_val; + Value condition_val; + Value not_condition_val; + + // Loop iteration parameters. + Value start_val; + Value end_val; + Value step_val; + + // Backward edge information. + Operation *ctrl_mov = nullptr; // Initialized to nullptr. + + // Used for replace and update operations. + llvm::SetVector ops_to_remove; + llvm::MapVector>> + users_to_update; + + // Adds operations to remove. + void addOpToRemove(Operation *op) { + if (op) { + ops_to_remove.insert(op); + } + } + + // Checks if the loop info is complete. + // There is no not_condition_val because it is derived from condition_val. + bool isComplete() const { + return reserve_val && phi_val && index_val && condition_val && start_val && + end_val && step_val && ctrl_mov; + } + + // Records the users that use the loop index and (not-)condition values. + void recordUsersToUpdate() { + // TODO: Implements the logic to record users of loop index and condition + // values. + } +}; + +struct FuseControlFlowPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(FuseControlFlowPass) + + StringRef getArgument() const override { return "fuse-control-flow"; } + StringRef getDescription() const override { + return "Fuses control flow operations into optimized neura dialect " + "operations"; + } + + void runOnOperation() override { + ModuleOp module_op = getOperation(); + // TODO: Adds the logic to fuse determined control flow operations. + } +}; +} // namespace + +namespace mlir::neura { +std::unique_ptr createFuseControlFlowPass() { + return std::make_unique(); +} +} // namespace mlir::neura \ No newline at end of file diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index 69ff0832..db569bdf 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -13,8 +13,8 @@ #include "mlir/IR/Value.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" @@ -139,6 +139,7 @@ void assertLiveOutValuesDominatedByBlockArgs(func::FuncOp &func) { if (&block == &func.getBody().front()) continue; + llvm::errs() << "[ctrl2data] Checking block: " << block << "\n"; DenseSet live_out_values; for (Operation &op : block) { for (Value result : op.getResults()) { @@ -155,14 +156,15 @@ void assertLiveOutValuesDominatedByBlockArgs(func::FuncOp &func) { if (live_out_values.empty()) continue; - DenseSet dominated_by_block_args; - - for (BlockArgument arg : block.getArguments()) { - dominated_by_block_args.insert(arg); - } + DenseSet dominated_values; - if (block.getNumArguments() == 0 && !live_out_values.empty()) { - assert(false && "Block without arguments has live-out values"); + for (Operation &op : block) { + for (Value operand : op.getOperands()) { + if (!operand.getDefiningOp() || + operand.getDefiningOp()->getBlock() != &block) { + dominated_values.insert(operand); + } + } } bool changed = true; @@ -170,12 +172,12 @@ void assertLiveOutValuesDominatedByBlockArgs(func::FuncOp &func) { changed = false; for (Operation &op : block) { for (Value result : op.getResults()) { - if (dominated_by_block_args.count(result)) + if (dominated_values.count(result)) continue; for (Value operand : op.getOperands()) { - if (dominated_by_block_args.count(operand)) { - dominated_by_block_args.insert(result); + if (dominated_values.count(operand)) { + dominated_values.insert(result); changed = true; break; } @@ -184,13 +186,16 @@ void assertLiveOutValuesDominatedByBlockArgs(func::FuncOp &func) { } } for (Value live_out : live_out_values) { - if (!dominated_by_block_args.count(live_out)) - assert(false && "Live-out value not dominated by block arguments"); + llvm::errs() << "[ctrl2data] Live-out value: " << live_out << "\n"; + if (!dominated_values.count(live_out)) { + assert(false && "Live-out value not dominated by block arguments or " + "live-in values"); + } } } - llvm::errs() - << "[ctrl2data] All live-out values are dominated by block arguments.\n"; + llvm::errs() << "[ctrl2data] All live-out values are dominated by block " + "arguments or live-in values.\n"; } // Builds control flow info for the given function. @@ -300,16 +305,17 @@ Value getPrecessedCondition(Value condition, bool is_not_condition, return not_condition; } -void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, - llvm::MapVector &arg_to_reserve, - llvm::MapVector &arg_to_phi_result, - OpBuilder &builder) { +void createReserveAndPhiOps( + func::FuncOp &func, ControlFlowInfo &ctrl_info, + llvm::MapVector &arg_to_reserve, + llvm::MapVector &arg_to_phi_result, + OpBuilder &builder) { DominanceInfo dom_info(func); // ================================================ // Step 1: Categorizes edges into six types. // ================================================ - // Type 1: Backward cond_br edges with values. + // Type 1: Backward cond_br edges with arguments. // Type 2: Backward br edges with values. // Type 3: Forward cond_br edges with values. // Type 4: Forward br edges with values. @@ -329,6 +335,9 @@ void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, llvm::MapVector> arg_to_phi_operands; + // Tracks the mapping of live-out values. + llvm::MapVector value_to_predicated_value; + for (auto &edge : ctrl_info.all_edges) { Block *target = edge->target; @@ -368,7 +377,102 @@ void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, } // ================================================ - // Step 2: Creates reserve and ctrl_mov operations for needed blockarguments. + // Step 2: Handles Forward cond_br edges without values. + // ================================================ + // Handles Type 5 edges. + for (auto &condition_pair : block_conditional_edges) { + Block *target = condition_pair.first; + auto &edges = condition_pair.second; + + if (edges.empty()) { + continue; + } + + // Collects all conditions for the target block. + SmallVector conditions; + for (ControlFlowInfo::Edge *edge : edges) { + Value condition = getPrecessedCondition( + edge->condition, edge->is_not_condition, condition_cache, builder); + conditions.push_back(condition); + } + + // Unsupported case: multiple conditions for a single block. + // TODO: Adds support if needed. + if (conditions.size() > 1) { + llvm::errs() << "[ctrl2data] Unsupported case: multiple conditions for a " + "single block: " + << *target << "\n"; + assert(false); + } + + if (target->getArguments().empty()) { + // Grants predicate for all the live-in values in the target block. + // Uses SetVector instead of DenseSet to maintain insertion order. + SetVector live_in_values; + for (Operation &op : target->getOperations()) { + for (Value operand : op.getOperands()) { + if (operand.getDefiningOp() && + operand.getDefiningOp()->getBlock() != target && + !isa(operand.getDefiningOp())) { + live_in_values.insert(operand); + } + } + } + + // Applies grant_predicate for each live-in value. + for (Value live_in_value : live_in_values) { + // Finds the earliest use of the live-in value. + Operation *earliest_use = nullptr; + for (Operation &op : target->getOperations()) { + for (Value operand : op.getOperands()) { + if (operand == live_in_value) { + earliest_use = &op; + break; + } + } + if (earliest_use) { + break; + } + } + + if (earliest_use) { + builder.setInsertionPoint(earliest_use); + } else { + builder.setInsertionPointToStart(target); + } + + // Creates predicated version of the live-in value. + Value predicated_value = builder.create( + live_in_value.getLoc(), live_in_value.getType(), live_in_value, + conditions[0]); + + value_to_predicated_value[live_in_value] = predicated_value; + + // Replace uses of the live-in value within this block only. + for (OpOperand &use : + llvm::make_early_inc_range(live_in_value.getUses())) { + if (use.getOwner()->getBlock() == target && + use.getOwner() != predicated_value.getDefiningOp()) { + use.set(predicated_value); + } + } + } + } + } + + // Updates the passed values in edges with predicated values. + for (auto &edge_ptr : ctrl_info.all_edges) { + ControlFlowInfo::Edge *edge = edge_ptr.get(); + for (size_t i = 0; i < edge->passed_values.size(); ++i) { + Value val = edge->passed_values[i]; + if (value_to_predicated_value.count(val)) { + edge->passed_values[i] = value_to_predicated_value[val]; + } + } + } + + // ================================================ + // Step 3: Creates reserve and ctrl_mov operations for needed blockarguments. // ================================================ // Handles Type 1 & 2 edges. for (auto &backward_pair : backward_value_edges) { @@ -402,7 +506,7 @@ void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, } // ================================================ - // Step 3: Prepares for creating phi operations. + // Step 4: Prepares for creating phi operations. // ================================================ // Handles Type 3 & 4 edges. @@ -429,7 +533,7 @@ void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, } // ================================================ - // Step 4: Creates phi operations for each block argument. + // Step 5: Creates phi operations for each block argument. // ================================================ for (auto &arg_to_phi_pair : arg_to_phi_operands) { BlockArgument arg = arg_to_phi_pair.first; @@ -467,88 +571,6 @@ void createReserveAndPhiOps(func::FuncOp &func, ControlFlowInfo &ctrl_info, arg_to_phi_result[arg] = phi; } } - - // ================================================ - // Step 5: Handles Forward cond_br edges without values. - // ================================================ - // Handles Type 5 edges. - for (auto &condition_pair : block_conditional_edges) { - Block *target = condition_pair.first; - auto &edges = condition_pair.second; - - if (edges.empty()) { - continue; - } - - // Collects all conditions for the target block. - SmallVector conditions; - for (ControlFlowInfo::Edge *edge : edges) { - Value condition = getPrecessedCondition( - edge->condition, edge->is_not_condition, condition_cache, builder); - conditions.push_back(condition); - } - - // Unsupported case: multiple conditions for a single block. - // TODO: Adds support if needed. - if (conditions.size() > 1) { - llvm::errs() << "[ctrl2data] Unsupported case: multiple conditions for a " - "single block: " - << *target << "\n"; - assert(false); - } - - if (target->getArguments().empty()) { - // Grants predicate for all the live-in values in the target block. - // Uses SetVector instead of DenseSet to maintain insertion order. - SetVector live_in_values; - for (Operation &op : target->getOperations()) { - for (Value operand : op.getOperands()) { - if (operand.getDefiningOp() && - operand.getDefiningOp()->getBlock() != target && - !isa(operand.getDefiningOp())) { - live_in_values.insert(operand); - } - } - } - - // Applies grant_predicate for each live-in value. - for (Value live_in_value : live_in_values) { - // Finds the earliest use of the live-in value. - Operation *earliest_use = nullptr; - for (Operation &op : target->getOperations()) { - for (Value operand : op.getOperands()) { - if (operand == live_in_value) { - earliest_use = &op; - break; - } - } - if (earliest_use) { - break; - } - } - - if (earliest_use) { - builder.setInsertionPoint(earliest_use); - } else { - builder.setInsertionPointToStart(target); - } - - // Creates predicated version of the live-in value - Value predicated_value = builder.create( - live_in_value.getLoc(), live_in_value.getType(), live_in_value, - conditions[0]); - - // Replace uses of the live-in value within this block only. - for (OpOperand &use : - llvm::make_early_inc_range(live_in_value.getUses())) { - if (use.getOwner()->getBlock() == target && - use.getOwner() != predicated_value.getDefiningOp()) { - use.set(predicated_value); - } - } - } - } - } } // Transforms control flow into data flow. diff --git a/test/controflow_fuse/complex_nested/complex_nested.cpp b/test/controflow_fuse/complex_nested/complex_nested.cpp new file mode 100644 index 00000000..eef62a91 --- /dev/null +++ b/test/controflow_fuse/complex_nested/complex_nested.cpp @@ -0,0 +1,35 @@ +// This function is used in image processing. +void complex_nested(int cube[32][32][32], int result[32][32]) { + for (int i = 0; i < 32; i++) { + int plane_sum = 0; + + for (int j = 0; j < 32; j++) { + result[i][j] = 0; + for (int k = 0; k < 32; k++) { + result[i][j] += cube[i][j][k]; + } + } + + int avg_value = 0; + for (int j = 0; j < 32; j++) { + plane_sum += result[i][j]; + } + avg_value = plane_sum / 32; + + for (int j = 0; j < 32; j++) { + int column_max = -128; + for (int k = 0; k < 32; k++) { + if (cube[k][j][i] > column_max) { + column_max = cube[k][j][i]; + } + } + result[i][j] = (result[i][j] * column_max) / 128; + } + + for (int j = 0; j < 32; j++) { + if (result[i][j] > avg_value) { + result[i][j] = avg_value; + } + } + } +} \ No newline at end of file diff --git a/test/controflow_fuse/complex_nested/complex_nested.mlir b/test/controflow_fuse/complex_nested/complex_nested.mlir new file mode 100644 index 00000000..c01e44bf --- /dev/null +++ b/test/controflow_fuse/complex_nested/complex_nested.mlir @@ -0,0 +1,307 @@ +// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA + +module attributes {} { + func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {llvm.linkage = #llvm.linkage} { + %c128_i32 = arith.constant 128 : i32 + %c-128_i32 = arith.constant -128 : i32 + %c32_i32 = arith.constant 32 : i32 + %c0_i32 = arith.constant 0 : i32 + affine.for %arg2 = 0 to 32 { + affine.for %arg3 = 0 to 32 { + affine.store %c0_i32, %arg1[%arg2, %arg3] : memref + affine.for %arg4 = 0 to 32 { + %2 = affine.load %arg0[%arg2, %arg3, %arg4] : memref + %3 = affine.load %arg1[%arg2, %arg3] : memref + %4 = arith.addi %3, %2 : i32 + affine.store %4, %arg1[%arg2, %arg3] : memref + } + } + %0 = affine.for %arg3 = 0 to 32 iter_args(%arg4 = %c0_i32) -> (i32) { + %2 = affine.load %arg1[%arg2, %arg3] : memref + %3 = arith.addi %arg4, %2 : i32 + affine.yield %3 : i32 + } + %1 = arith.divsi %0, %c32_i32 : i32 + affine.for %arg3 = 0 to 32 { + %2 = affine.for %arg4 = 0 to 32 iter_args(%arg5 = %c-128_i32) -> (i32) { + %6 = affine.load %arg0[%arg4, %arg3, %arg2] : memref + %7 = arith.cmpi sgt, %6, %arg5 : i32 + %8 = arith.select %7, %6, %arg5 : i32 + affine.yield %8 : i32 + } + %3 = affine.load %arg1[%arg2, %arg3] : memref + %4 = arith.muli %3, %2 : i32 + %5 = arith.divsi %4, %c128_i32 : i32 + affine.store %5, %arg1[%arg2, %arg3] : memref + } + affine.for %arg3 = 0 to 32 { + %2 = affine.load %arg1[%arg2, %arg3] : memref + %3 = arith.cmpi sgt, %2, %1 : i32 + scf.if %3 { + affine.store %1, %arg1[%arg2, %arg3] : memref + } + } + } + return + } +} + +// CHECK: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 32 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : i32}> : () -> i32 +// CHECK-NEXT: %3 = "neura.constant"() <{value = -128 : i32}> : () -> i32 +// CHECK-NEXT: %4 = "neura.constant"() <{value = 32 : i32}> : () -> i32 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %7 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %7 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%8: i64): // 2 preds: ^bb0, ^bb22 +// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb2 else to ^bb23 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %11 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %11 : i64 to ^bb3 +// CHECK-NEXT: ^bb3(%12: i64): // 2 preds: ^bb2, ^bb7 +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %14 = "neura.icmp"(%13, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %14 : i1 then to ^bb4 else to ^bb8 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: neura.store_indexed %5 to %arg1[%9, %13 : index, index] memref : i32 +// CHECK-NEXT: %15 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %15 : i64 to ^bb5 +// CHECK-NEXT: ^bb5(%16: i64): // 2 preds: ^bb4, ^bb6 +// CHECK-NEXT: %17 = "neura.cast"(%16) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %18 = "neura.icmp"(%17, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %18 : i1 then to ^bb6 else to ^bb7 +// CHECK-NEXT: ^bb6: // pred: ^bb5 +// CHECK-NEXT: %19 = neura.load_indexed %arg0[%9, %13, %17 : index, index, index] memref : i32 +// CHECK-NEXT: %20 = neura.load_indexed %arg1[%9, %13 : index, index] memref : i32 +// CHECK-NEXT: %21 = "neura.add"(%20, %19) : (i32, i32) -> i32 +// CHECK-NEXT: neura.store_indexed %21 to %arg1[%9, %13 : index, index] memref : i32 +// CHECK-NEXT: %22 = "neura.add"(%17, %0) : (index, index) -> index +// CHECK-NEXT: %23 = "neura.cast"(%22) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %23 : i64 to ^bb5 +// CHECK-NEXT: ^bb7: // pred: ^bb5 +// CHECK-NEXT: %24 = "neura.add"(%13, %0) : (index, index) -> index +// CHECK-NEXT: %25 = "neura.cast"(%24) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %25 : i64 to ^bb3 +// CHECK-NEXT: ^bb8: // pred: ^bb3 +// CHECK-NEXT: %26 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %26, %5 : i64, i32 to ^bb9 +// CHECK-NEXT: ^bb9(%27: i64, %28: i32): // 2 preds: ^bb8, ^bb10 +// CHECK-NEXT: %29 = "neura.cast"(%27) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %30 = "neura.icmp"(%29, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %30 : i1 then to ^bb10 else to ^bb11 +// CHECK-NEXT: ^bb10: // pred: ^bb9 +// CHECK-NEXT: %31 = neura.load_indexed %arg1[%9, %29 : index, index] memref : i32 +// CHECK-NEXT: %32 = "neura.add"(%28, %31) : (i32, i32) -> i32 +// CHECK-NEXT: %33 = "neura.add"(%29, %0) : (index, index) -> index +// CHECK-NEXT: %34 = "neura.cast"(%33) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %34, %32 : i64, i32 to ^bb9 +// CHECK-NEXT: ^bb11: // pred: ^bb9 +// CHECK-NEXT: %35 = "neura.div"(%28, %4) : (i32, i32) -> i32 +// CHECK-NEXT: %36 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %36 : i64 to ^bb12 +// CHECK-NEXT: ^bb12(%37: i64): // 2 preds: ^bb11, ^bb16 +// CHECK-NEXT: %38 = "neura.cast"(%37) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %39 = "neura.icmp"(%38, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %39 : i1 then to ^bb13 else to ^bb17 +// CHECK-NEXT: ^bb13: // pred: ^bb12 +// CHECK-NEXT: %40 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %40, %3 : i64, i32 to ^bb14 +// CHECK-NEXT: ^bb14(%41: i64, %42: i32): // 2 preds: ^bb13, ^bb15 +// CHECK-NEXT: %43 = "neura.cast"(%41) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %44 = "neura.icmp"(%43, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %44 : i1 then to ^bb15 else to ^bb16 +// CHECK-NEXT: ^bb15: // pred: ^bb14 +// CHECK-NEXT: %45 = neura.load_indexed %arg0[%43, %38, %9 : index, index, index] memref : i32 +// CHECK-NEXT: %46 = "neura.icmp"(%45, %42) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: %47 = "neura.sel"(%45, %42, %46) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %48 = "neura.add"(%43, %0) : (index, index) -> index +// CHECK-NEXT: %49 = "neura.cast"(%48) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %49, %47 : i64, i32 to ^bb14 +// CHECK-NEXT: ^bb16: // pred: ^bb14 +// CHECK-NEXT: %50 = neura.load_indexed %arg1[%9, %38 : index, index] memref : i32 +// CHECK-NEXT: %51 = "neura.mul"(%50, %42) : (i32, i32) -> i32 +// CHECK-NEXT: %52 = "neura.div"(%51, %2) : (i32, i32) -> i32 +// CHECK-NEXT: neura.store_indexed %52 to %arg1[%9, %38 : index, index] memref : i32 +// CHECK-NEXT: %53 = "neura.add"(%38, %0) : (index, index) -> index +// CHECK-NEXT: %54 = "neura.cast"(%53) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %54 : i64 to ^bb12 +// CHECK-NEXT: ^bb17: // pred: ^bb12 +// CHECK-NEXT: %55 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %55 : i64 to ^bb18 +// CHECK-NEXT: ^bb18(%56: i64): // 2 preds: ^bb17, ^bb21 +// CHECK-NEXT: %57 = "neura.cast"(%56) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %58 = "neura.icmp"(%57, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %58 : i1 then to ^bb19 else to ^bb22 +// CHECK-NEXT: ^bb19: // pred: ^bb18 +// CHECK-NEXT: %59 = neura.load_indexed %arg1[%9, %57 : index, index] memref : i32 +// CHECK-NEXT: %60 = "neura.icmp"(%59, %35) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: neura.cond_br %60 : i1 then to ^bb20 else to ^bb21 +// CHECK-NEXT: ^bb20: // pred: ^bb19 +// CHECK-NEXT: neura.store_indexed %35 to %arg1[%9, %57 : index, index] memref : i32 +// CHECK-NEXT: neura.br to ^bb21 +// CHECK-NEXT: ^bb21: // 2 preds: ^bb19, ^bb20 +// CHECK-NEXT: %61 = "neura.add"(%57, %0) : (index, index) -> index +// CHECK-NEXT: %62 = "neura.cast"(%61) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %62 : i64 to ^bb18 +// CHECK-NEXT: ^bb22: // pred: ^bb18 +// CHECK-NEXT: %63 = "neura.add"(%9, %0) : (index, index) -> index +// CHECK-NEXT: %64 = "neura.cast"(%63) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %64 : i64 to ^bb1 +// CHECK-NEXT: ^bb23: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } + +// CTRL2DATA: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = 32 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.grant_always"(%2) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 128 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = "neura.grant_always"(%4) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = -128 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %7 = "neura.grant_always"(%6) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %8 = "neura.grant_once"(%6) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %9 = "neura.constant"() <{value = 32 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %10 = "neura.grant_always"(%9) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %11 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %12 = "neura.grant_always"(%11) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %13 = "neura.grant_once"(%11) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %14 = "neura.constant"() <{value = 0 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %15 = "neura.grant_always"(%14) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %16 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %17 = "neura.grant_once"(%16) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %18 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %19 = "neura.phi"(%18, %17) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %20 = "neura.cast"(%19) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %21 = "neura.icmp"(%20, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %22 = "neura.not"(%21) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %23 = neura.grant_predicate %15, %21 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %24 = "neura.cast"(%23) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %25 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %26 = "neura.phi"(%25, %24) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %27 = "neura.cast"(%26) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %28 = "neura.icmp"(%27, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %29 = "neura.not"(%28) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %30 = neura.grant_predicate %12, %28 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %31 = neura.grant_predicate %20, %28 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %32 = neura.grant_predicate %27, %28 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %30 to %arg1[%31, %32 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %33 = neura.grant_predicate %15, %28 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %34 = "neura.cast"(%33) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %35 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %36 = "neura.phi"(%35, %34) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %37 = "neura.cast"(%36) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %38 = "neura.icmp"(%37, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %39 = "neura.not"(%38) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %40 = neura.grant_predicate %20, %38 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %41 = neura.grant_predicate %27, %38 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %42 = neura.grant_predicate %37, %38 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %43 = neura.load_indexed %arg0[%40, %41, %42 : !neura.data, !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %44 = neura.load_indexed %arg1[%40, %41 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %45 = "neura.add"(%44, %43) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %45 to %arg1[%40, %41 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %46 = neura.grant_predicate %1, %38 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %47 = "neura.add"(%42, %46) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %48 = "neura.cast"(%47) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %48 -> %35 : !neura.data !neura.data +// CTRL2DATA-NEXT: %49 = neura.grant_predicate %27, %39 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %50 = neura.grant_predicate %1, %39 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %51 = "neura.add"(%49, %50) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %52 = "neura.cast"(%51) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %52 -> %25 : !neura.data !neura.data +// CTRL2DATA-NEXT: %53 = neura.grant_predicate %15, %29 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %54 = "neura.cast"(%53) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %55 = neura.grant_predicate %13, %29 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %56 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %57 = "neura.phi"(%56, %55) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %58 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %59 = "neura.phi"(%58, %54) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %60 = "neura.cast"(%59) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %61 = "neura.icmp"(%60, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %62 = "neura.not"(%61) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %63 = neura.grant_predicate %20, %61 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %64 = neura.grant_predicate %60, %61 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %65 = neura.load_indexed %arg1[%63, %64 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %66 = "neura.add"(%57, %65) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %67 = neura.grant_predicate %1, %61 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %68 = "neura.add"(%64, %67) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %69 = "neura.cast"(%68) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %69 -> %58 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %66 -> %56 : !neura.data !neura.data +// CTRL2DATA-NEXT: %70 = neura.grant_predicate %10, %62 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %71 = "neura.div"(%57, %70) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %72 = neura.grant_predicate %15, %62 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %73 = "neura.cast"(%72) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %74 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %75 = "neura.phi"(%74, %73) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %76 = "neura.cast"(%75) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %77 = "neura.icmp"(%76, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %78 = "neura.not"(%77) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %79 = neura.grant_predicate %15, %77 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %80 = "neura.cast"(%79) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %81 = neura.grant_predicate %8, %77 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %82 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %83 = "neura.phi"(%82, %81) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %84 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %85 = "neura.phi"(%84, %80) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %86 = "neura.cast"(%85) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %87 = "neura.icmp"(%86, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %88 = "neura.not"(%87) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %89 = neura.grant_predicate %86, %87 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %90 = neura.grant_predicate %76, %87 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %91 = neura.grant_predicate %20, %87 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %92 = neura.load_indexed %arg0[%89, %90, %91 : !neura.data, !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %93 = "neura.icmp"(%92, %83) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %94 = "neura.sel"(%92, %83, %93) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %95 = neura.grant_predicate %1, %87 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %96 = "neura.add"(%89, %95) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %97 = "neura.cast"(%96) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %97 -> %84 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %94 -> %82 : !neura.data !neura.data +// CTRL2DATA-NEXT: %98 = neura.grant_predicate %20, %88 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %99 = neura.grant_predicate %76, %88 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %100 = neura.load_indexed %arg1[%98, %99 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %101 = "neura.mul"(%100, %83) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %102 = neura.grant_predicate %5, %88 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %103 = "neura.div"(%101, %102) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %103 to %arg1[%98, %99 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %104 = neura.grant_predicate %1, %88 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %105 = "neura.add"(%99, %104) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %106 = "neura.cast"(%105) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %106 -> %74 : !neura.data !neura.data +// CTRL2DATA-NEXT: %107 = neura.grant_predicate %15, %78 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %108 = "neura.cast"(%107) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %109 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %110 = "neura.phi"(%109, %108) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %111 = "neura.cast"(%110) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %112 = "neura.icmp"(%111, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %113 = "neura.not"(%112) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %114 = neura.grant_predicate %20, %112 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %115 = neura.grant_predicate %111, %112 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %116 = neura.load_indexed %arg1[%114, %115 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %117 = neura.grant_predicate %71, %112 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %118 = "neura.icmp"(%116, %117) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %119 = "neura.not"(%118) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %120 = neura.grant_predicate %71, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %121 = neura.grant_predicate %20, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %122 = neura.grant_predicate %111, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %120 to %arg1[%121, %122 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %123 = neura.grant_predicate %111, %119 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %124 = neura.grant_predicate %1, %119 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %125 = "neura.add"(%123, %124) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %126 = "neura.cast"(%125) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %126 -> %109 : !neura.data !neura.data +// CTRL2DATA-NEXT: %127 = neura.grant_predicate %20, %113 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %128 = neura.grant_predicate %1, %113 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %129 = "neura.add"(%127, %128) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %130 = "neura.cast"(%129) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %130 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.cpp b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.cpp new file mode 100644 index 00000000..d687f134 --- /dev/null +++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.cpp @@ -0,0 +1,31 @@ +void non_perfect_extra_computation(int input[128][128], int output[128][128]) { + for (int i = 0; i < 128; i++) { + int row_sum = 0; + int row_max = -1000; + int row_min = 1000; + int threshold = i * 2; + int scale = (i % 2 == 0) ? 2 : 3; + + for (int j = 0; j < 128; j++) { + output[i][j] = input[i][j] * scale; + row_sum += input[i][j]; + + if (input[i][j] > row_max) { + row_max = input[i][j]; + } + if (input[i][j] < row_min) { + row_min = input[i][j]; + } + } + + int average = row_sum / 128; + int range = row_max - row_min; + int normalized = (range > 0) ? (average * 100 / range) : average; + + output[i][0] = average; + output[i][1] = row_max; + output[i][2] = row_min; + output[i][3] = normalized; + output[i][4] = range; + } +} \ No newline at end of file diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir new file mode 100644 index 00000000..8c91e7cd --- /dev/null +++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir @@ -0,0 +1,227 @@ +// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA + +module attributes {} { + func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {llvm.linkage = #llvm.linkage} { + %c100_i32 = arith.constant 100 : i32 + %c3_i32 = arith.constant 3 : i32 + %c2_i32 = arith.constant 2 : i32 + %c1000_i32 = arith.constant 1000 : i32 + %c-1000_i32 = arith.constant -1000 : i32 + %c128_i32 = arith.constant 128 : i32 + %c0_i32 = arith.constant 0 : i32 + affine.for %arg2 = 0 to 128 { + %0 = arith.index_cast %arg2 : index to i32 + %1 = arith.remsi %0, %c2_i32 : i32 + %2 = arith.cmpi eq, %1, %c0_i32 : i32 + %3 = arith.select %2, %c2_i32, %c3_i32 : i32 + %4:3 = affine.for %arg3 = 0 to 128 iter_args(%arg4 = %c1000_i32, %arg5 = %c-1000_i32, %arg6 = %c0_i32) -> (i32, i32, i32) { + %9 = affine.load %arg0[%arg2, %arg3] : memref + %10 = arith.muli %9, %3 : i32 + affine.store %10, %arg1[%arg2, %arg3] : memref + %11 = affine.load %arg0[%arg2, %arg3] : memref + %12 = arith.addi %arg6, %11 : i32 + %13 = arith.cmpi sgt, %11, %arg5 : i32 + %14 = arith.select %13, %11, %arg5 : i32 + %15 = arith.cmpi slt, %11, %arg4 : i32 + %16 = arith.select %15, %11, %arg4 : i32 + affine.yield %16, %14, %12 : i32, i32, i32 + } + %5 = arith.divsi %4#2, %c128_i32 : i32 + %6 = arith.subi %4#1, %4#0 : i32 + %7 = arith.cmpi sgt, %6, %c0_i32 : i32 + %8 = scf.if %7 -> (i32) { + %9 = arith.muli %5, %c100_i32 : i32 + %10 = arith.divsi %9, %6 : i32 + scf.yield %10 : i32 + } else { + scf.yield %5 : i32 + } + affine.store %5, %arg1[%arg2, 0] : memref + affine.store %4#1, %arg1[%arg2, 1] : memref + affine.store %4#0, %arg1[%arg2, 2] : memref + affine.store %8, %arg1[%arg2, 3] : memref + affine.store %6, %arg1[%arg2, 4] : memref + } + return + } +} + +// CHECK: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 4 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 3 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 2 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %5 = "neura.constant"() <{value = 100 : i32}> : () -> i32 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 3 : i32}> : () -> i32 +// CHECK-NEXT: %7 = "neura.constant"() <{value = 2 : i32}> : () -> i32 +// CHECK-NEXT: %8 = "neura.constant"() <{value = 1000 : i32}> : () -> i32 +// CHECK-NEXT: %9 = "neura.constant"() <{value = -1000 : i32}> : () -> i32 +// CHECK-NEXT: %10 = "neura.constant"() <{value = 128 : i32}> : () -> i32 +// CHECK-NEXT: %11 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %12 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %13 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%14: i64): // 2 preds: ^bb0, ^bb9 +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %16 = "neura.icmp"(%15, %4) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %16 : i1 then to ^bb2 else to ^bb10 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %17 = "neura.cast"(%15) <{cast_type = "index_to_int"}> : (index) -> i32 +// CHECK-NEXT: %18 = "neura.div"(%17, %7) : (i32, i32) -> i32 +// CHECK-NEXT: %19 = "neura.mul"(%7, %18) : (i32, i32) -> i32 +// CHECK-NEXT: %20 = "neura.sub"(%17, %19) : (i32, i32) -> i32 +// CHECK-NEXT: %21 = "neura.icmp"(%20, %11) <{cmpType = "eq"}> : (i32, i32) -> i1 +// CHECK-NEXT: %22 = "neura.sel"(%7, %6, %21) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %23 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %23, %8, %9, %11 : i64, i32, i32, i32 to ^bb3 +// CHECK-NEXT: ^bb3(%24: i64, %25: i32, %26: i32, %27: i32): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %28 = "neura.cast"(%24) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %29 = "neura.icmp"(%28, %4) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %29 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %30 = neura.load_indexed %arg0[%15, %28 : index, index] memref : i32 +// CHECK-NEXT: %31 = "neura.mul"(%30, %22) : (i32, i32) -> i32 +// CHECK-NEXT: neura.store_indexed %31 to %arg1[%15, %28 : index, index] memref : i32 +// CHECK-NEXT: %32 = neura.load_indexed %arg0[%15, %28 : index, index] memref : i32 +// CHECK-NEXT: %33 = "neura.add"(%27, %32) : (i32, i32) -> i32 +// CHECK-NEXT: %34 = "neura.icmp"(%32, %26) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: %35 = "neura.sel"(%32, %26, %34) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %36 = "neura.icmp"(%32, %25) <{cmpType = "slt"}> : (i32, i32) -> i1 +// CHECK-NEXT: %37 = "neura.sel"(%32, %25, %36) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %38 = "neura.add"(%28, %3) : (index, index) -> index +// CHECK-NEXT: %39 = "neura.cast"(%38) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %39, %37, %35, %33 : i64, i32, i32, i32 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %40 = "neura.div"(%27, %10) : (i32, i32) -> i32 +// CHECK-NEXT: %41 = "neura.sub"(%26, %25) : (i32, i32) -> i32 +// CHECK-NEXT: %42 = "neura.icmp"(%41, %11) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: neura.cond_br %42 : i1 then to ^bb6 else to ^bb7 +// CHECK-NEXT: ^bb6: // pred: ^bb5 +// CHECK-NEXT: %43 = "neura.mul"(%40, %5) : (i32, i32) -> i32 +// CHECK-NEXT: %44 = "neura.div"(%43, %41) : (i32, i32) -> i32 +// CHECK-NEXT: neura.br %44 : i32 to ^bb8 +// CHECK-NEXT: ^bb7: // pred: ^bb5 +// CHECK-NEXT: neura.br %40 : i32 to ^bb8 +// CHECK-NEXT: ^bb8(%45: i32): // 2 preds: ^bb6, ^bb7 +// CHECK-NEXT: neura.br to ^bb9 +// CHECK-NEXT: ^bb9: // pred: ^bb8 +// CHECK-NEXT: neura.store_indexed %40 to %arg1[%15, %12 : index, index] memref : i32 +// CHECK-NEXT: neura.store_indexed %26 to %arg1[%15, %3 : index, index] memref : i32 +// CHECK-NEXT: neura.store_indexed %25 to %arg1[%15, %2 : index, index] memref : i32 +// CHECK-NEXT: neura.store_indexed %45 to %arg1[%15, %1 : index, index] memref : i32 +// CHECK-NEXT: neura.store_indexed %41 to %arg1[%15, %0 : index, index] memref : i32 +// CHECK-NEXT: %46 = "neura.add"(%15, %3) : (index, index) -> index +// CHECK-NEXT: %47 = "neura.cast"(%46) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %47 : i64 to ^bb1 +// CHECK-NEXT: ^bb10: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } + +// CTRL2DATA: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = 4 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = 3 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.grant_always"(%2) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 2 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = "neura.grant_always"(%4) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 1 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %7 = "neura.grant_always"(%6) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %8 = "neura.constant"() <{value = 128 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %9 = "neura.grant_always"(%8) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %10 = "neura.constant"() <{value = 100 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %11 = "neura.grant_always"(%10) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %12 = "neura.constant"() <{value = 3 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %13 = "neura.grant_always"(%12) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %14 = "neura.constant"() <{value = 2 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %15 = "neura.grant_always"(%14) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %16 = "neura.constant"() <{value = 1000 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %17 = "neura.grant_always"(%16) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %18 = "neura.grant_once"(%16) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %19 = "neura.constant"() <{value = -1000 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %20 = "neura.grant_always"(%19) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %21 = "neura.grant_once"(%19) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %22 = "neura.constant"() <{value = 128 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %23 = "neura.grant_always"(%22) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %24 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %25 = "neura.grant_always"(%24) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %26 = "neura.grant_once"(%24) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %27 = "neura.constant"() <{value = 0 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %28 = "neura.grant_always"(%27) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %29 = "neura.cast"(%27) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %30 = "neura.grant_once"(%29) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %31 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %32 = "neura.phi"(%31, %30) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %33 = "neura.cast"(%32) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %34 = "neura.icmp"(%33, %9) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %35 = "neura.not"(%34) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %36 = neura.grant_predicate %33, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %37 = "neura.cast"(%36) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %38 = neura.grant_predicate %15, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %39 = "neura.div"(%37, %38) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %40 = "neura.mul"(%38, %39) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %41 = "neura.sub"(%37, %40) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %42 = neura.grant_predicate %25, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %43 = "neura.icmp"(%41, %42) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %44 = neura.grant_predicate %13, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %45 = "neura.sel"(%38, %44, %43) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %46 = neura.grant_predicate %28, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %47 = "neura.cast"(%46) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %48 = neura.grant_predicate %18, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %49 = neura.grant_predicate %21, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %50 = neura.grant_predicate %26, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %51 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %52 = "neura.phi"(%51, %50) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %53 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %54 = "neura.phi"(%53, %49) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %55 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %56 = "neura.phi"(%55, %48) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %57 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %58 = "neura.phi"(%57, %47) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %59 = "neura.cast"(%58) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %60 = "neura.icmp"(%59, %9) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %61 = "neura.not"(%60) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %62 = neura.grant_predicate %33, %60 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %63 = neura.grant_predicate %59, %60 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %64 = neura.load_indexed %arg0[%62, %63 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %65 = neura.grant_predicate %45, %60 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %66 = "neura.mul"(%64, %65) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %66 to %arg1[%62, %63 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %67 = neura.load_indexed %arg0[%62, %63 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %68 = "neura.add"(%52, %67) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %69 = "neura.icmp"(%67, %54) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %70 = "neura.sel"(%67, %54, %69) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %71 = "neura.icmp"(%67, %56) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %72 = "neura.sel"(%67, %56, %71) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %73 = neura.grant_predicate %7, %60 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %74 = "neura.add"(%63, %73) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %75 = "neura.cast"(%74) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %75 -> %57 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %72 -> %55 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %70 -> %53 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %68 -> %51 : !neura.data !neura.data +// CTRL2DATA-NEXT: %76 = neura.grant_predicate %23, %61 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %77 = "neura.div"(%52, %76) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %78 = "neura.sub"(%54, %56) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %79 = neura.grant_predicate %25, %61 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %80 = "neura.icmp"(%78, %79) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %81 = "neura.not"(%80) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %82 = neura.grant_predicate %77, %80 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %83 = neura.grant_predicate %11, %80 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %84 = "neura.mul"(%82, %83) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %85 = neura.grant_predicate %78, %80 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %86 = "neura.div"(%84, %85) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %87 = neura.grant_predicate %77, %81 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %88 = "neura.phi"(%86, %87) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %77 to %arg1[%33, %28 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %54 to %arg1[%33, %7 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %56 to %arg1[%33, %5 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %88 to %arg1[%33, %3 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %78 to %arg1[%33, %1 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %89 = "neura.add"(%33, %7) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %90 = "neura.cast"(%89) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %90 -> %31 : !neura.data !neura.data +// CTRL2DATA-NEXT: "neura.return"() : () -> () +// CTRL2DATA-NEXT: } diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.cpp b/test/controflow_fuse/perfect_nested/perfect_nested.cpp new file mode 100644 index 00000000..7aa5ca29 --- /dev/null +++ b/test/controflow_fuse/perfect_nested/perfect_nested.cpp @@ -0,0 +1,19 @@ +void bert_node1( + bool input[1][1][1][1][1][128], + bool output[1][1][128][1][1][128]) { + + for (int arg3 = 0; arg3 < 1; arg3++) { + for (int arg4 = 0; arg4 < 1; arg4++) { + for (int arg5 = 0; arg5 < 128; arg5++) { + for (int arg6 = 0; arg6 < 1; arg6++) { + for (int arg7 = 0; arg7 < 1; arg7++) { + for (int arg8 = 0; arg8 < 128; arg8++) { + bool value = input[arg3][arg4][0][arg6][arg7][arg8]; + output[arg3][arg4][arg5][arg6][arg7][arg8] = value; + } + } + } + } + } + } +} \ No newline at end of file diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.mlir b/test/controflow_fuse/perfect_nested/perfect_nested.mlir new file mode 100644 index 00000000..1dbbcaf1 --- /dev/null +++ b/test/controflow_fuse/perfect_nested/perfect_nested.mlir @@ -0,0 +1,83 @@ +// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA + +module attributes {} { + func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {llvm.linkage = #llvm.linkage} { + affine.for %arg2 = 0 to 128 { + affine.for %arg3 = 0 to 128 { + %0 = affine.load %arg0[0, 0, 0, 0, 0, %arg3] : memref + affine.store %0, %arg1[0, 0, %arg2, 0, 0, %arg3] : memref + } + } + return + } +} + +// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %3 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 +// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %7 : i64 to ^bb3 +// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %13 : i64 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %15 : i64 to ^bb1 +// CHECK-NEXT: ^bb6: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } + +// CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.grant_always"(%2) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 0 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = "neura.grant_always"(%4) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.cast"(%4) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %7 = "neura.grant_once"(%6) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %8 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %9 = "neura.phi"(%8, %7) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %11 = "neura.icmp"(%10, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %12 = "neura.not"(%11) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %13 = neura.grant_predicate %5, %11 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %14 = "neura.cast"(%13) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %15 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %16 = "neura.phi"(%15, %14) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %17 = "neura.cast"(%16) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %18 = "neura.icmp"(%17, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %19 = "neura.not"(%18) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %20 = neura.grant_predicate %5, %18 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %21 = neura.grant_predicate %17, %18 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %22 = neura.load_indexed %arg0[%20, %20, %20, %20, %20, %21 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %23 = neura.grant_predicate %10, %18 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %22 to %arg1[%20, %20, %23, %20, %20, %21 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %24 = neura.grant_predicate %1, %18 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %25 = "neura.add"(%21, %24) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %26 = "neura.cast"(%25) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %26 -> %15 : !neura.data !neura.data +// CTRL2DATA-NEXT: %27 = neura.grant_predicate %10, %19 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %28 = neura.grant_predicate %1, %19 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %29 = "neura.add"(%27, %28) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %30 = "neura.cast"(%29) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %30 -> %8 : !neura.data !neura.data +// CTRL2DATA-NEXT: "neura.return"() : () -> () diff --git a/test/controflow_fuse/perfect_reduction/perfect_reduction.cpp b/test/controflow_fuse/perfect_reduction/perfect_reduction.cpp new file mode 100644 index 00000000..ee17f47b --- /dev/null +++ b/test/controflow_fuse/perfect_reduction/perfect_reduction.cpp @@ -0,0 +1,9 @@ +int perfect_nested_reduction_2d(int matrix[128][128]) { + int sum = 0; + for (int i = 0; i < 128; i++) { + for (int j = 0; j < 128; j++) { + sum += matrix[i][j]; + } + } + return sum; +} \ No newline at end of file diff --git a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir new file mode 100644 index 00000000..cc04b7f0 --- /dev/null +++ b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir @@ -0,0 +1,96 @@ +// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA + +module attributes {} { + func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = affine.for %arg1 = 0 to 128 iter_args(%arg2 = %c0_i32) -> (i32) { + %1 = affine.for %arg3 = 0 to 128 iter_args(%arg4 = %arg2) -> (i32) { + %2 = affine.load %arg0[%arg1, %arg3] : memref + %3 = arith.addi %arg4, %2 : i32 + affine.yield %3 : i32 + } + affine.yield %1 : i32 + } + return %0 : i32 + } +} + + +// CHECK: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4, %2 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb1(%5: i64, %6: i32): // 2 preds: ^bb0, ^bb5 +// CHECK-NEXT: %7 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %8 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %8 : i1 then to ^bb2 else to ^bb6 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %9 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %9, %6 : i64, i32 to ^bb3 +// CHECK-NEXT: ^bb3(%10: i64, %11: i32): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %12 = "neura.cast"(%10) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %13 = "neura.icmp"(%12, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %13 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %14 = neura.load_indexed %arg0[%7, %12 : index, index] memref : i32 +// CHECK-NEXT: %15 = "neura.add"(%11, %14) : (i32, i32) -> i32 +// CHECK-NEXT: %16 = "neura.add"(%12, %0) : (index, index) -> index +// CHECK-NEXT: %17 = "neura.cast"(%16) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %17, %15 : i64, i32 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %18 = "neura.add"(%7, %0) : (index, index) -> index +// CHECK-NEXT: %19 = "neura.cast"(%18) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %19, %11 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb6: // pred: ^bb1 +// CHECK-NEXT: "neura.return"(%6) : (i32) -> () +// CHECK-NEXT: } + +// CTRL2DATA: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.grant_always"(%2) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 0 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %7 = "neura.grant_always"(%6) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %8 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %9 = "neura.grant_once"(%8) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %10 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %11 = "neura.phi"(%10, %5) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %12 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %13 = "neura.phi"(%12, %9) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %14 = "neura.cast"(%13) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %15 = "neura.icmp"(%14, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %16 = "neura.not"(%15) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %17 = neura.grant_predicate %7, %15 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %18 = "neura.cast"(%17) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %19 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %20 = "neura.phi"(%19, %11) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %21 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %22 = "neura.phi"(%21, %18) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %23 = "neura.cast"(%22) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %24 = "neura.icmp"(%23, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %25 = "neura.not"(%24) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %26 = neura.grant_predicate %14, %24 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %27 = neura.grant_predicate %23, %24 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %28 = neura.load_indexed %arg0[%26, %27 : !neura.data, !neura.data] memref : !neura.data +// CTRL2DATA-NEXT: %29 = "neura.add"(%20, %28) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %30 = neura.grant_predicate %1, %24 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %31 = "neura.add"(%27, %30) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %32 = "neura.cast"(%31) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %32 -> %21 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %29 -> %19 : !neura.data !neura.data +// CTRL2DATA-NEXT: %33 = neura.grant_predicate %14, %25 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %34 = neura.grant_predicate %1, %25 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %35 = "neura.add"(%33, %34) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %36 = "neura.cast"(%35) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %36 -> %12 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %20 -> %10 : !neura.data !neura.data +// CTRL2DATA-NEXT: "neura.return"(%11) : (!neura.data) -> () +// CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/simpleloop/simpleloop.cpp b/test/controflow_fuse/simpleloop/simpleloop.cpp new file mode 100644 index 00000000..9fa01d72 --- /dev/null +++ b/test/controflow_fuse/simpleloop/simpleloop.cpp @@ -0,0 +1,10 @@ +int simpleloop() { + int start = 0; + int multiplier = 1; + int result = start; + for (int i = 0; i < 128; i++) { + result = result * multiplier + i; + } + + return result; +} \ No newline at end of file diff --git a/test/controflow_fuse/simpleloop/simpleloop.mlir b/test/controflow_fuse/simpleloop/simpleloop.mlir new file mode 100644 index 00000000..0684bc9f --- /dev/null +++ b/test/controflow_fuse/simpleloop/simpleloop.mlir @@ -0,0 +1,65 @@ +// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura --leverage-predicated-value --transform-ctrl-to-data-flow | FileCheck %s -check-prefix=CTRL2DATA + +module attributes {} { + func.func @_Z10simpleloopv() -> i32 attributes {llvm.linkage = #llvm.linkage} { + %c0_i32 = arith.constant 0 : i32 + %0 = affine.for %arg0 = 0 to 128 iter_args(%arg1 = %c0_i32) -> (i32) { + %1 = arith.index_cast %arg0 : index to i32 + %2 = arith.addi %arg1, %1 : i32 + affine.yield %2 : i32 + } + return %0 : i32 + } +} + +// CHECK: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4, %2 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb1(%5: i64, %6: i32): // 2 preds: ^bb0, ^bb2 +// CHECK-NEXT: %7 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %8 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %8 : i1 then to ^bb2 else to ^bb3 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %9 = "neura.cast"(%7) <{cast_type = "index_to_int"}> : (index) -> i32 +// CHECK-NEXT: %10 = "neura.add"(%6, %9) : (i32, i32) -> i32 +// CHECK-NEXT: %11 = "neura.add"(%7, %0) : (index, index) -> index +// CHECK-NEXT: %12 = "neura.cast"(%11) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %12, %10 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb3: // pred: ^bb1 +// CHECK-NEXT: "neura.return"(%6) : (i32) -> () +// CHECK-NEXT: } + + +// CTRL2DATA: func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %1 = "neura.grant_always"(%0) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %3 = "neura.grant_always"(%2) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %4 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// CTRL2DATA-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %6 = "neura.constant"() <{value = 0 : index}> : () -> !neura.data +// CTRL2DATA-NEXT: %7 = "neura.cast"(%6) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %8 = "neura.grant_once"(%7) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %9 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %10 = "neura.phi"(%9, %5) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %11 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %12 = "neura.phi"(%11, %8) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %13 = "neura.cast"(%12) <{cast_type = "int_to_index"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %14 = "neura.icmp"(%13, %3) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %15 = "neura.not"(%14) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %16 = neura.grant_predicate %13, %14 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %17 = "neura.cast"(%16) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %18 = "neura.add"(%10, %17) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %19 = neura.grant_predicate %1, %14 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %20 = "neura.add"(%16, %19) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %21 = "neura.cast"(%20) <{cast_type = "index_to_int"}> : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %21 -> %11 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %18 -> %9 : !neura.data !neura.data +// CTRL2DATA-NEXT: "neura.return"(%10) : (!neura.data) -> () +// CTRL2DATA-NEXT: } diff --git a/test/neura/ctrl/branch_without_arg.mlir b/test/neura/ctrl/branch_without_arg.mlir index 680cf88a..10ef442b 100644 --- a/test/neura/ctrl/branch_without_arg.mlir +++ b/test/neura/ctrl/branch_without_arg.mlir @@ -62,13 +62,13 @@ func.func @test(%in: i64) -> f32 { // CTRL2DATA-NEXT: %8 = "neura.grant_once"(%7) : (!neura.data) -> !neura.data // CTRL2DATA-NEXT: %9 = "neura.icmp"(%arg0, %0) <{cmpType = "eq"}> : (i64, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %10 = "neura.grant_once"(%9) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %11 = neura.grant_predicate %6, %10 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %12 = neura.grant_predicate %8, %10 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %13 = "neura.not"(%10) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %14 = neura.grant_predicate %2, %13 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %15 = neura.grant_predicate %4, %13 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %11 = "neura.not"(%10) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %12 = neura.grant_predicate %6, %10 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %13 = neura.grant_predicate %8, %10 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %14 = neura.grant_predicate %2, %11 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %15 = neura.grant_predicate %4, %11 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %16 = "neura.fadd"(%14, %15) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %17 = "neura.fmul"(%11, %12) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %17 = "neura.fmul"(%12, %13) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %18 = "neura.phi"(%16, %17) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: "neura.return"(%18) : (!neura.data) -> () -// CTRL2DATA-NEXT: } \ No newline at end of file +// CTRL2DATA-NEXT: } \ No newline at end of file