diff --git a/.gitignore b/.gitignore index fb05ba10..c74f7e72 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ build/ !arch_spec_example.yaml !architecture.yaml /test/**/*.asm +/test/**/*.json +/test/**/.sh .lit_test_times.txt lit.cfg *.dot diff --git a/lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp b/lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp index 260d09db..f19e3b2c 100644 --- a/lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp +++ b/lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp @@ -3,11 +3,15 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Block.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Region.h" #include "mlir/IR/Value.h" #include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -17,24 +21,348 @@ using namespace mlir; #include "NeuraDialect/NeuraPasses.h.inc" namespace { -LogicalResult promoteLiveInValuesToBlockArgs(Region ®ion) { +struct DirectDominatingLiveIn { + // The live-in value. + Value value; + // The block where the live-in value is defined. + Block *defining_block; + // The block where the live-in value is used. + Block *using_block; +}; + +// Checks if two blocks form a single-source-single-sink pattern with +// conditional control flow between them. +// +// Pattern Structure: +// [ Source Block A ] +// / \ +// / \ +// [ Block B ] [ Block C ] +// \ / +// \ / +// [ Sink Block D ] +// +// Key Properties: +// 1. Source block A dominates sink block D +// - All paths to D must go through A +// 2. Sink block D post-dominates source block A +// - All paths from A eventually reach D +// 3. There exists at least one conditional branch (cond_br) between A and D +// - Control flow diverges and then converges +// 4. No back edges (loop-free) +// - Neither branch target of any cond_br dominates the cond_br block itself +// +// Examples of Valid Patterns: +// +// 1. Simple if-else: +// [ A: cond_br ] +// / \ +// [ B: then ] [ C: else ] +// \ / +// [ D: merge ] +// +// 2. Asymmetric branches: +// [ A: cond_br ] +// / \ +// [ B ] | +// \ / +// [ D: merge ] +// +// Counter-examples (Not Valid): +// +// 1. Loop structure (has back edge): +// [ A: cond_br ] <---+ +// / \ | +// [ B: exit ] [ C ] | +// \------+ +// +// 2. Entry block as source: +// [ Entry Block ] <- Excluded to maintain compatibility +// | with TransformCtrlToDataFlowPass +// [ cond_br ] +// +// This pattern is used to identify direct dataflow live-ins that cross +// conditional branches, enabling specialized optimization for values that +// flow through divergent-convergent control flow regions. +bool isSingleSourceSingleSinkPattern(Block *defining_block, Block *using_block, + DominanceInfo &dom_info, + PostDominanceInfo &post_dom_info) { + // 1. If defining_block and using_block are the same, then there are no + // conditional branches on the path. + if (defining_block == using_block) { + return false; + } + + // 2. defining_block must dominate using_block. + // This ensures that all paths to using_block go through defining_block. + if (!dom_info.dominates(defining_block, using_block)) { + return false; + } + + // 3. using_block must post-dominate defining_block. + // This ensures that all paths from defining_block eventually reach + // using_block. + if (!post_dom_info.postDominates(using_block, defining_block)) { + return false; + } + + // 4. If defining_block is the entry block of the region, it is not considered + // as crossing a conditional branch. + // Avoids violating assertions in TransformCtrlToDataFlowPass.cpp. + if (defining_block == &defining_block->getParent()->front()) { + return false; + } + + // 5. Checks if using_block is a direct successor (no intermediate blocks) of + // defining_block. + for (Block *succ : defining_block->getSuccessors()) { + if (succ == using_block) { + Operation *term_op = defining_block->getTerminator(); + // If the terminator is an unconditional branch, then no conditional + // branch exists on the path. + if (isa(term_op)) { + return false; + } + // If it is a conditional branch, but both targets are using_block, it is + // also considered no real branch. + if (auto cond_br = dyn_cast(term_op)) { + if (cond_br.getTrueDest() == using_block && + cond_br.getFalseDest() == using_block) { + return false; + } + } + } + } + + // 6. Finds any conditional branch on the paths from defining_block to + // using_block. This is to find any conditional branch divergence between the + // defining_block and using_block. + // Because we also support the case where defining_block itself does not + // contain cond_br (e.g., E in this example). + // [ E: br ] + // | + // [ A: cond_br ] + // / \ + // [ B: then ] [ C: else ] + // \ / + // [ D: merge ] + bool found_conditional_branch = false; + Block *conditional_branch_block = nullptr; + + Region *region = defining_block->getParent(); + for (Block &block : region->getBlocks()) { + if (&block == defining_block || &block == using_block) { + continue; + } + + // Checks if this block is on the path from defining_block to using_block. + if (dom_info.dominates(defining_block, &block) && + dom_info.dominates(&block, using_block)) { + + // Checks if this block's terminator is a conditional branch. + Operation *term_op = block.getTerminator(); + if (auto cond_br = dyn_cast(term_op)) { + Block *true_dest = cond_br.getTrueDest(); + Block *false_dest = cond_br.getFalseDest(); + + // Ensures both branch targets are different (true conditional branch). + if (true_dest != false_dest) { + found_conditional_branch = true; + conditional_branch_block = █ + break; + } + } + } + } + + // 7. Checks the terminator of defining_block itself. + Operation *defining_term = defining_block->getTerminator(); + if (auto cond_br = dyn_cast(defining_term)) { + Block *true_dest = cond_br.getTrueDest(); + Block *false_dest = cond_br.getFalseDest(); + if (true_dest != false_dest) { + found_conditional_branch = true; + conditional_branch_block = defining_block; + } + } + + if (!found_conditional_branch) { + return false; + } + + // 8. Key Constraint: Verifies that BOTH branches eventually reach using_block + // WITHOUT creating a loop back to conditional_branch_block or earlier. + assert(conditional_branch_block && + "Must have found a conditional branch block"); + + Operation *cond_term = conditional_branch_block->getTerminator(); + auto cond_br = dyn_cast(cond_term); + assert(cond_br && "Must be a conditional branch"); + + Block *true_dest = cond_br.getTrueDest(); + Block *false_dest = cond_br.getFalseDest(); + + // Checks loop back edge: If either branch goes back to the conditional branch + // block or any of its dominators, it creates a loop. + if (true_dest == conditional_branch_block || + dom_info.dominates(true_dest, conditional_branch_block)) { + llvm::errs() + << "[CanoLiveIn] True branch creates a back edge (loop pattern)\n"; + return false; + } + + if (false_dest == conditional_branch_block || + dom_info.dominates(false_dest, conditional_branch_block)) { + llvm::errs() + << "[CanoLiveIn] False branch creates a back edge (loop pattern)\n"; + return false; + } + + // Checks if both branches can reach using_block. + bool true_reaches = (true_dest == using_block); + if (!true_reaches) { + if (dom_info.dominates(true_dest, using_block)) { + true_reaches = true; + } else { + for (Block *pred : using_block->getPredecessors()) { + if (pred == true_dest || dom_info.dominates(true_dest, pred)) { + true_reaches = true; + break; + } + } + } + } + + bool false_reaches = (false_dest == using_block); + if (!false_reaches) { + if (dom_info.dominates(false_dest, using_block)) { + false_reaches = true; + } else { + for (Block *pred : using_block->getPredecessors()) { + if (pred == false_dest || dom_info.dominates(false_dest, pred)) { + false_reaches = true; + break; + } + } + } + } + + if (!true_reaches || !false_reaches) { + return false; + } + + return true; +} + +DenseMap> +identifyDirectDominatingLiveIns(Region ®ion, DominanceInfo &dom_info, + PostDominanceInfo &post_dom_info) { + DenseMap> + using_block_to_dominating_direct_live_ins; + for (Block &block : region.getBlocks()) { + // Skips the entry block. + if (&block == ®ion.front()) { + continue; + } + + // Collects direct live-in values for the block. + SetVector live_ins; + for (Operation &op : block.getOperations()) { + for (Value operand : op.getOperands()) { + // If the operand is defined in another block, it is a live-in value. + if (auto block_arg = dyn_cast(operand)) { + if (block_arg.getOwner() != &block) { + live_ins.insert(operand); + } + } else { + Operation *def_op = operand.getDefiningOp(); + if (def_op && def_op->getBlock() != &block) { + live_ins.insert(operand); + } + } + } + } + + // Checks each live-in value to see if it has direct dominating + // dependencies. + // Direct dominating dependency means: + // 1. The defining block of the live-in value dominates the using block. + // 2. The using block post-dominates the defining block. + // 3. We can ensure the live-in in the using block is valid once the + // defining block is executed. + for (Value live_in : live_ins) { + Block *defining_block = nullptr; + + if (auto block_arg = dyn_cast(live_in)) { + defining_block = block_arg.getOwner(); + } else { + Operation *def_op = live_in.getDefiningOp(); + if (def_op) { + defining_block = def_op->getBlock(); + } + } + + if (!defining_block) { + continue; + } + + // Pattern 1: Single-Source-Single-Sink with conditional branches. + if (isSingleSourceSingleSinkPattern(defining_block, &block, dom_info, + post_dom_info)) { + DirectDominatingLiveIn direct_dominating_live_in; + direct_dominating_live_in.value = live_in; + direct_dominating_live_in.defining_block = defining_block; + direct_dominating_live_in.using_block = █ + + using_block_to_dominating_direct_live_ins[&block].push_back( + direct_dominating_live_in); + } + + // TODO: Add more direct dominating live-in patterns based on dominance + // and post-dominance analysis. Issue: + // https://github.com/coredac/dataflow/issues/159 + } + } + return using_block_to_dominating_direct_live_ins; +} + +LogicalResult promoteLiveInValuesToBlockArgs(Region ®ion, + DominanceInfo &dom_info, + PostDominanceInfo &post_dom_info) { if (region.empty()) { return success(); } + + DenseMap> + direct_dominating_live_ins = + identifyDirectDominatingLiveIns(region, dom_info, post_dom_info); + + // Maps each block to its dominating direct live-in values. + DenseMap> direct_dominating_live_in_values; + for (auto &[block, dataflow_live_ins] : direct_dominating_live_ins) { + for (auto &dataflow_live_in : dataflow_live_ins) { + direct_dominating_live_in_values[block].insert(dataflow_live_in.value); + } + } + // Collects direct live-in values for each block in the region. // Without considering the transitive dependencies. DenseMap> direct_live_ins; - Block &entry_block = region.front(); // Initializes the direct live-ins for each block. for (Block &block : region.getBlocks()) { - if (&block == &entry_block) { + if (&block == ®ion.front()) { continue; } SetVector live_ins; for (Operation &op : block.getOperations()) { for (Value operand : op.getOperands()) { + // If the operand is a direct dominating live-in value, skip it. + if (direct_dominating_live_in_values[&block].contains(operand)) { + continue; + } + // If the operand is defined in another block, it is a live-in value. if (auto block_arg = dyn_cast(operand)) { if (block_arg.getOwner() != &block) { @@ -54,9 +382,9 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region ®ion) { } } - // If we update a branch or conditional branch, we may introduce new live-ins - // for a block. So we need to propagate live-in values until a fixed point is - // reached. + // If we update a branch or conditional branch, we may introduce new + // live-ins for a block. So we need to propagate live-in values until a + // fixed point is reached. // ************************************************************************* // For example, consider this control flow: @@ -119,6 +447,12 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region ®ion) { // Checks if the live-in value in successor block is defined in the // current block. for (Value live_in : succ_live_ins) { + // If it is a direct dominating live-in value for the successor block, + // we skip it. + if (direct_dominating_live_in_values[succ_block].contains(live_in)) { + continue; + } + // If it is defined in the current block, that means it is not a // live-in value for the current block. We can skip it. if (Operation *def_op = live_in.getDefiningOp()) { @@ -271,7 +605,6 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region ®ion) { } } } - return success(); } @@ -313,7 +646,11 @@ struct CanonicalizeLiveInPass return; } - if (failed(promoteLiveInValuesToBlockArgs(*region))) { + DominanceInfo dom_info(op); + PostDominanceInfo post_dom_info(op); + + if (failed(promoteLiveInValuesToBlockArgs(*region, dom_info, + post_dom_info))) { signalPassFailure(); return; } diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index d3a1481b..8781bfaf 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -488,19 +488,19 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, // Sorts blocks by reverse post-order traversal to maintain SSA dominance. Block *entry_block = ®ion.front(); SmallVector blocks_to_flatten; - + // Uses reverse post-order: visit successors before predecessors. // This ensures that when we move blocks, definitions come before uses. llvm::SetVector visited; // Post-order traversal result, used for sorting blocks. SmallVector po_order; - - std::function po_traverse = [&](Block *block) { + + std::function po_traverse = [&](Block *block) { // Records visited block and skips if already visited. if (!visited.insert(block)) { return; } - + // Visits successors first (post-order). Operation *terminator = block->getTerminator(); if (auto br = dyn_cast(terminator)) { @@ -509,16 +509,16 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, po_traverse(cond_br.getTrueDest()); po_traverse(cond_br.getFalseDest()); } - + // Adds to post-order. po_order.push_back(block); }; - + po_traverse(entry_block); - + // Reverses post-order for forward traversal. SmallVector rpo_order(po_order.rbegin(), po_order.rend()); - + // Collects non-entry blocks in RPO order. for (Block *block : rpo_order) { if (block != entry_block) { diff --git a/test/lit.cfg.in b/test/lit.cfg.in index d93c0990..5ec6d9e3 100644 --- a/test/lit.cfg.in +++ b/test/lit.cfg.in @@ -6,7 +6,7 @@ config.test_format = lit.formats.ShTest(True) config.suffixes = ['.mlir'] config.test_source_root = os.path.dirname(__file__) config.test_exec_root = os.path.dirname(__file__) -config.excludes = ['samples'] +config.excludes = ['samples', 'benchmark'] # Tool substitutions from CMake config.substitutions.append(('mlir-neura-opt', '@MLIR_NEURA_OPT@')) diff --git a/test/neura/for_loop/relu_test.mlir b/test/neura/for_loop/relu_test.mlir index 193931b5..96f6b365 100644 --- a/test/neura/for_loop/relu_test.mlir +++ b/test/neura/for_loop/relu_test.mlir @@ -18,8 +18,21 @@ // RUN: --transform-ctrl-to-data-flow \ // RUN: | FileCheck %s --check-prefix=CTRL2DATA -// CHECK: func.func -// CHECK: accelerator = "neura" +// RUN: mlir-neura-opt %t-relu.mlir\ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --promote-func-arg-to-const \ +// RUN: --fold-constant \ +// RUN: --canonicalize-live-in \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: --fold-constant \ +// RUN: --insert-data-mov \ +// RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized" \ +// RUN: | FileCheck %s --check-prefix=MAPPING + +// CHECK: func.func @_Z6kernelPiS_ +// CHECK-SAME: accelerator = "neura" // CHECK-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !llvm.ptr // CHECK-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> !llvm.ptr // CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i64}> : () -> i64 @@ -33,22 +46,23 @@ // CHECK-NEXT: %12 = "neura.gep"(%7, %6) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr // CHECK-NEXT: %13 = "neura.load"(%12) : (!llvm.ptr) -> i32 // CHECK-NEXT: %14 = "neura.icmp"(%13, %8) <{cmpType = "sgt"}> : (i32, i32) -> i1 -// CHECK-NEXT: neura.cond_br %14 : i1 then %9, %6, %13, %10, %11, %7, %8 : !llvm.ptr, i64, i32, i64, i64, !llvm.ptr, i32 to ^bb3 else %6, %10, %11, %7, %8, %9 : i64, i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 +// CHECK-NEXT: neura.cond_br %14 : i1 then %9, %6, %13, %10, %11, %7, %8 : !llvm.ptr, i64, i32, i64, i64, !llvm.ptr, i32 to ^bb3 else %10, %11, %7, %8, %9 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 // CHECK-NEXT: ^bb3(%15: !llvm.ptr, %16: i64, %17: i32, %18: i64, %19: i64, %20: !llvm.ptr, %21: i32): // pred: ^bb2 // CHECK-NEXT: %22 = "neura.gep"(%15, %16) <{operandSegmentSizes = array}> : (!llvm.ptr, i64) -> !llvm.ptr // CHECK-NEXT: %23 = "neura.load"(%22) : (!llvm.ptr) -> i32 // CHECK-NEXT: %24 = "neura.add"(%23, %17) : (i32, i32) -> i32 // CHECK-NEXT: "neura.store"(%24, %22) : (i32, !llvm.ptr) -> () -// CHECK-NEXT: neura.br %16, %18, %19, %20, %21, %15 : i64, i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 -// CHECK-NEXT: ^bb4(%25: i64, %26: i64, %27: i64, %28: !llvm.ptr, %29: i32, %30: !llvm.ptr): // 2 preds: ^bb2, ^bb3 -// CHECK-NEXT: %31 = "neura.add"(%25, %26) : (i64, i64) -> i64 -// CHECK-NEXT: %32 = "neura.icmp"(%31, %27) <{cmpType = "eq"}> : (i64, i64) -> i1 -// CHECK-NEXT: neura.cond_br %32 : i1 then to ^bb1 else %31, %28, %29, %30, %26, %27 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 +// CHECK-NEXT: neura.br %18, %19, %20, %21, %15 : i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4 +// CHECK-NEXT: ^bb4(%25: i64, %26: i64, %27: !llvm.ptr, %28: i32, %29: !llvm.ptr): // 2 preds: ^bb2, ^bb3 +// CHECK-NEXT: %30 = "neura.add"(%6, %25) : (i64, i64) -> i64 +// CHECK-NEXT: %31 = "neura.icmp"(%30, %26) <{cmpType = "eq"}> : (i64, i64) -> i1 +// CHECK-NEXT: neura.cond_br %31 : i1 then to ^bb1 else %30, %27, %28, %29, %25, %26 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2 // CHECK-NEXT: } -// CTRL2DATA: func.func -// CTRL2DATA: accelerator = "neura" +// CTRL2DATA: func.func @_Z6kernelPiS_ +// CTRL2DATA-SAME: accelerator = "neura" +// CTRL2DATA-SAME: dataflow_mode = "predicate" // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data) -> !neura.data // CTRL2DATA-NEXT: %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data @@ -84,36 +98,76 @@ // CTRL2DATA-NEXT: %32 = neura.grant_predicate %21, %26 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %33 = neura.grant_predicate %19, %26 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %34 = "neura.not"(%26) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %35 = neura.grant_predicate %23, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %36 = neura.grant_predicate %15, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %37 = neura.grant_predicate %13, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %38 = neura.grant_predicate %21, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %39 = neura.grant_predicate %19, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %40 = neura.grant_predicate %17, %34 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %41 = "neura.gep"(%27, %28) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %42 = "neura.load"(%41) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %43 = "neura.add"(%42, %29) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: "neura.store"(%43, %41) : (!neura.data, !neura.data) -> () -// CTRL2DATA-NEXT: %44 = "neura.phi"(%40, %27) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %45 = "neura.phi"(%39, %33) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %46 = "neura.phi"(%38, %32) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %47 = "neura.phi"(%37, %31) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %48 = "neura.phi"(%36, %30) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %49 = "neura.phi"(%35, %28) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %50 = "neura.add"(%49, %48) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %51 = "neura.icmp"(%50, %47) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %52 = "neura.not"(%51) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %53 = neura.grant_predicate %50, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %22 : !neura.data !neura.data -// CTRL2DATA-NEXT: %54 = neura.grant_predicate %46, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %20 : !neura.data !neura.data -// CTRL2DATA-NEXT: %55 = neura.grant_predicate %45, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %18 : !neura.data !neura.data -// CTRL2DATA-NEXT: %56 = neura.grant_predicate %44, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %16 : !neura.data !neura.data -// CTRL2DATA-NEXT: %57 = neura.grant_predicate %48, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %14 : !neura.data !neura.data -// CTRL2DATA-NEXT: %58 = neura.grant_predicate %47, %52 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %58 -> %12 : !neura.data !neura.data +// CTRL2DATA-NEXT: %35 = neura.grant_predicate %15, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %36 = neura.grant_predicate %13, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %37 = neura.grant_predicate %21, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %38 = neura.grant_predicate %19, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %39 = neura.grant_predicate %17, %34 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %40 = "neura.gep"(%27, %28) <{operandSegmentSizes = array}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %41 = "neura.load"(%40) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %42 = "neura.add"(%41, %29) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: "neura.store"(%42, %40) : (!neura.data, !neura.data) -> () +// CTRL2DATA-NEXT: %43 = "neura.phi"(%39, %27) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %44 = "neura.phi"(%38, %33) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %45 = "neura.phi"(%37, %32) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %46 = "neura.phi"(%36, %31) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %47 = "neura.phi"(%35, %30) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %48 = "neura.add"(%23, %47) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %49 = "neura.icmp"(%48, %46) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %50 = "neura.not"(%49) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %51 = neura.grant_predicate %48, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %51 -> %22 : !neura.data !neura.data +// CTRL2DATA-NEXT: %52 = neura.grant_predicate %45, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %52 -> %20 : !neura.data !neura.data +// CTRL2DATA-NEXT: %53 = neura.grant_predicate %44, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: %54 = neura.grant_predicate %43, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %16 : !neura.data !neura.data +// CTRL2DATA-NEXT: %55 = neura.grant_predicate %47, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %14 : !neura.data !neura.data +// CTRL2DATA-NEXT: %56 = neura.grant_predicate %46, %50 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %12 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () -// CTRL2DATA-NEXT: } \ No newline at end of file +// CTRL2DATA-NEXT: } + + +// MAPPING: func.func @_Z6kernelPiS_ +// MAPPING-SAME: accelerator = "neura", dataflow_mode = "predicate" +// MAPPING-SAME: mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32} +// MAPPING-NEXT: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {mapping_locs = [{id = 11 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = neura.reserve : !neura.data +// MAPPING-NEXT: %2 = "neura.data_mov"(%0) {mapping_locs = [{id = 704 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = "neura.phi"(%1, %2) {mapping_locs = [{id = 11 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {mapping_locs = [{id = 36 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {lhs_value = "%arg0", mapping_locs = [{id = 7 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.data_mov"(%5) {mapping_locs = [{id = 23 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %7 = "neura.load"(%6) {mapping_locs = [{id = 11 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.data_mov"(%7) {mapping_locs = [{id = 37 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {mapping_locs = [{id = 15 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 3 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %10 = "neura.data_mov"(%3) {mapping_locs = [{id = 35 : i32, resource = "link", time_step = 1 : i32}, {id = 34 : i32, resource = "link", time_step = 2 : i32}, {id = 896 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 896 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.data_mov"(%9) {mapping_locs = [{id = 46 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = neura.grant_predicate %10, %11 {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 3 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%7) {mapping_locs = [{id = 704 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 35 : i32, resource = "link", time_step = 4 : i32}, {id = 640 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 640 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.data_mov"(%9) {mapping_locs = [{id = 47 : i32, resource = "link", time_step = 4 : i32}, {id = 35 : i32, resource = "link", time_step = 5 : i32}, {id = 641 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %15 = neura.grant_predicate %13, %14 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 7 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%12) {mapping_locs = [{id = 45 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {lhs_value = "%arg1", mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%17) {mapping_locs = [{id = 31 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.load"(%18) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {mapping_locs = [{id = 576 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.data_mov"(%15) {mapping_locs = [{id = 31 : i32, resource = "link", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.add"(%20, %21) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 8 : i32, x = 1 : i32, y = 2 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.data_mov"(%22) {mapping_locs = [{id = 30 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%17) {mapping_locs = [{id = 34 : i32, resource = "link", time_step = 6 : i32}, {id = 43 : i32, resource = "link", time_step = 7 : i32}, {id = 832 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%23, %24) {mapping_locs = [{id = 13 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %25 = "neura.data_mov"(%3) {mapping_locs = [{id = 704 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%25) {mapping_locs = [{id = 11 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) {mapping_locs = [{id = 35 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%28) {mapping_locs = [{id = 640 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.not"(%29) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%26) {mapping_locs = [{id = 704 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 35 : i32, resource = "link", time_step = 3 : i32}, {id = 640 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {mapping_locs = [{id = 641 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = neura.grant_predicate %31, %32 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %33 -> %1 {mapping_locs = [{id = 32 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: "neura.return"() {mapping_locs = [{id = 2 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 0 : i32}]} : () -> ()