Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "test/benchmark/CGRA-Bench"]
path = test/benchmark/CGRA-Bench
url = https://github.com/tancheng/CGRA-Bench.git
ignore = all
174 changes: 165 additions & 9 deletions lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct DirectDominatingLiveIn {
// | with TransformCtrlToDataFlowPass
// [ cond_br ]
//
// This pattern is used to identify direct dataflow live-ins that cross
// This pattern is used to identify direct dominating live-ins that cross
// conditional branches, enabling specialized optimization for values that
// flow through divergent-convergent control flow regions.
bool isSingleSourceSingleSinkPattern(Block *defining_block, Block *using_block,
Expand Down Expand Up @@ -254,6 +254,129 @@ bool isSingleSourceSingleSinkPattern(Block *defining_block, Block *using_block,
return true;
}

// Checks if there's a direct unconditional path from defining_block to
// using_block without crossing any conditional branches.
//
// Pattern Structure:
// [ Defining Block A ]
// | (br)
// v
// [ Block B ]
// | (br)
// v
// [ Block C ]
// | (br)
// v
// [ Using Block D ]
//
// Key Properties:
// 1. Defining block dominates using block
// - All paths to using_block go through defining_block
// 2. Using block post-dominates defining block
// - All paths from defining_block eventually reach using_block
// - This ensures there's a unique path
// 3. All intermediate blocks only have unconditional branches (br)
// - No conditional branches (cond_br) on the path
// 4. No loops (no back edges)
//
// Examples of Valid Patterns:
//
// 1. Direct successor:
// [ A: br ]
// |
// [ B ]
//
// 2. Chain of unconditional branches:
// [ A: br ]
// |
// [ B: br ]
// |
// [ C: br ]
// |
// [ D ]
//
// Counter-examples (Not Valid):
//
// 1. Has conditional branch:
// [ A: br ]
// |
// [ B: cond_br ] <- Has cond_br
// / \
// ... ...
//
// 2. Entry block as defining:
// [ Entry: br ] <- Excluded
// |
// [ B ]
//
// 3. Loop structure:
// [ A: br ] <--+
// | |
// [ B: br ]-----+
//
// This pattern identifies the simplest form of direct dominating live-ins where
// values flow through a linear sequence of blocks without any control flow
// divergence.
bool isDirectUnconditionalPattern(Block *defining_block, Block *using_block,
DominanceInfo &dom_info,
PostDominanceInfo &post_dom_info) {
// 1. If blocks are the same, not a valid pattern.
if (defining_block == using_block) {
return false;
}

// 2. Defing block must dominate using block.
if (!dom_info.dominates(defining_block, using_block)) {
return false;
}

// 3. Using block must post-dominate defining block.
if (!post_dom_info.postDominates(using_block, defining_block)) {
return false;
}

// 4. Entry block cannot be the defining block.
if (defining_block == &defining_block->getParent()->front()) {
return false;
}

// 5. Checks all blocks on the path from defining_block to using_block.
// They must all have unconditional branches (br) only.
Region *region = defining_block->getParent();
for (Block &block : region->getBlocks()) {
// Skips blocks not on the path.
if (!dom_info.dominates(defining_block, &block) ||
!dom_info.dominates(&block, using_block)) {
continue;
}

// For blocks on the path, checks if their terminators are unconditional
// branches only (excluding using_block itself).
if (&block != using_block) {
Operation *term_op = block.getTerminator();

// If the terminator is a conditional branch, this pattern is not
// satisfied.
if (isa<neura::CondBr>(term_op)) {
return false;
}

// The terminator must be an unconditional branch (br).
assert(isa<neura::Br>(term_op) &&
"The terminator must be an unconditional branch.\n");

// Ensures no backward edges (loops) exist.
neura::Br br_op = cast<neura::Br>(term_op);
Block *dest = br_op.getDest();
// If the destination block dominates current block, it creates a loop.
if (dom_info.dominates(dest, &block)) {
return false;
}
}
}
return true;
}

DenseMap<Block *, SmallVector<DirectDominatingLiveIn>>
identifyDirectDominatingLiveIns(Region &region, DominanceInfo &dom_info,
PostDominanceInfo &post_dom_info) {
Expand All @@ -269,7 +392,8 @@ identifyDirectDominatingLiveIns(Region &region, DominanceInfo &dom_info,
SetVector<Value> live_ins;
for (Operation &op : block.getOperations()) {
for (Value operand : op.getOperands()) {
// If the operand is defined in another block, it is a live-in value.
// If the operand is defined in another block, it is a live-in
// value.
if (auto block_arg = dyn_cast<BlockArgument>(operand)) {
if (block_arg.getOwner() != &block) {
live_ins.insert(operand);
Expand All @@ -290,6 +414,11 @@ identifyDirectDominatingLiveIns(Region &region, DominanceInfo &dom_info,
// 2. The using block post-dominates the defining block.
// 3. We can ensure the live-in in the using block is valid once the
// defining block is executed.
//
// We support two mutually exclusive patterns:
// - Pattern 1: Single-Source-Single-Sink with only one conditional branch
// (cond_br).
// - Pattern 2: Linear path with only unconditional branches (br).
for (Value live_in : live_ins) {
Block *defining_block = nullptr;

Expand All @@ -306,20 +435,45 @@ identifyDirectDominatingLiveIns(Region &region, DominanceInfo &dom_info,
continue;
}

// Pattern 1: Single-Source-Single-Sink with conditional branches.
// Pattern 1: Single-Source-Single-Sink with one conditional branch.
if (isSingleSourceSingleSinkPattern(defining_block, &block, dom_info,
post_dom_info)) {
assert(!isDirectUnconditionalPattern(defining_block, &block, dom_info,
post_dom_info) &&
"Patterns 1 and 2 are mutually exclusive.");
DirectDominatingLiveIn direct_dominating_live_in;
direct_dominating_live_in.value = live_in;
direct_dominating_live_in.defining_block = defining_block;
direct_dominating_live_in.using_block = &block;

using_block_to_dominating_direct_live_ins[&block].push_back(
direct_dominating_live_in);

// Pattern 1 matched, skip Pattern 2 check (they are mutually
// exclusive).
continue;
}

// Pattern 2: Direct Unconditional Path.
if (isDirectUnconditionalPattern(defining_block, &block, dom_info,
post_dom_info)) {
assert(!isSingleSourceSingleSinkPattern(defining_block, &block,
dom_info, post_dom_info) &&
"Patterns 1 and 2 are mutually exclusive.");
DirectDominatingLiveIn direct_dominating_live_in;
direct_dominating_live_in.value = live_in;
direct_dominating_live_in.defining_block = defining_block;
direct_dominating_live_in.using_block = &block;

using_block_to_dominating_direct_live_ins[&block].push_back(
direct_dominating_live_in);

// Pattern 2 matched.
continue;
}

// TODO: Add more direct dominating live-in patterns based on dominance
// and post-dominance analysis. Issue:
// TODO: Add more direct dominating live-in patterns based on
// dominance and post-dominance analysis. Issue:
// https://github.com/coredac/dataflow/issues/159
}
}
Expand Down Expand Up @@ -363,7 +517,8 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region &region,
continue;
}

// If the operand is defined in another block, it is a live-in value.
// If the operand is defined in another block, it is a live-in
// value.
if (auto block_arg = dyn_cast<BlockArgument>(operand)) {
if (block_arg.getOwner() != &block) {
live_ins.insert(operand);
Expand Down Expand Up @@ -447,8 +602,8 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region &region,
// Checks if the live-in value in successor block is defined in the
// current block.
for (Value live_in : succ_live_ins) {
// If it is a direct dominating live-in value for the successor block,
// we skip it.
// If it is a direct dominating live-in value for the successor
// block, we skip it.
if (direct_dominating_live_in_values[succ_block].contains(live_in)) {
continue;
}
Expand Down Expand Up @@ -593,7 +748,8 @@ LogicalResult promoteLiveInValuesToBlockArgs(Region &region,
}
}

// If an update is needed, create a new conditional branch operation.
// If an update is needed, create a new conditional branch
// operation.
if (needs_update) {
OpBuilder builder(cond_br_op);
builder.create<neura::CondBr>(
Expand Down
96 changes: 89 additions & 7 deletions test/e2e/bicg/bicg_kernel.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
// RUN: -o %t-before-canonicalize.mlir
// RUN: FileCheck %s --input-file=%t-before-canonicalize.mlir -check-prefix=BEFORE_CANONICALIZE

// RUN: mlir-neura-opt %t-kernel.mlir \
// RUN: --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --promote-func-arg-to-const \
// RUN: --fold-constant \
// RUN: --canonicalize-live-in \
// RUN: -o %t-after-canonicalize.mlir
// RUN: FileCheck %s --input-file=%t-after-canonicalize.mlir -check-prefix=AFTER_CANONICALIZE

// RUN: mlir-neura-opt %t-kernel.mlir \
// RUN: --assign-accelerator \
Expand Down Expand Up @@ -95,11 +103,67 @@
// BEFORE_CANONICALIZE: ^bb8: // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
// BEFORE_CANONICALIZE: "neura.return"() : () -> ()

// AFTER_CANONICALIZE: func.func @kernel
// AFTER_CANONICALIZE-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> i32
// AFTER_CANONICALIZE-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> i32
// AFTER_CANONICALIZE-NEXT: %2 = "neura.constant"() <{value = "%arg3"}> : () -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: %3 = "neura.constant"() <{value = "%arg4"}> : () -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: %4 = "neura.constant"() <{value = 3 : i64}> : () -> i64
// AFTER_CANONICALIZE-NEXT: %5 = "neura.constant"() <{value = 0 : i8}> : () -> i8
// AFTER_CANONICALIZE-NEXT: %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
// AFTER_CANONICALIZE-NEXT: %7 = "neura.icmp"(%0) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
// AFTER_CANONICALIZE-NEXT: neura.cond_br %7 : i1 then %0, %4, %2, %5, %1, %6 : i32, i64, !llvm.ptr, i8, i32, i64 to ^bb1 else %1, %4, %3, %5 : i32, i64, !llvm.ptr, i8 to ^bb2
// AFTER_CANONICALIZE-NEXT: ^bb1(%8: i32, %9: i64, %10: !llvm.ptr, %11: i8, %12: i32, %13: i64): // pred: ^bb0
// AFTER_CANONICALIZE-NEXT: %14 = neura.zext %8 : i32 -> i64
// AFTER_CANONICALIZE-NEXT: %15 = "neura.shl"(%14, %9) : (i64, i64) -> i64
// AFTER_CANONICALIZE-NEXT: "neura.memset"(%10, %11, %15) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
// AFTER_CANONICALIZE-NEXT: %16 = "neura.icmp"(%12) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
// AFTER_CANONICALIZE-NEXT: neura.cond_br %16 : i1 then %12, %8, %13 : i32, i32, i64 to ^bb4 else to ^bb8
// AFTER_CANONICALIZE-NEXT: ^bb2(%17: i32, %18: i64, %19: !llvm.ptr, %20: i8): // pred: ^bb0
// AFTER_CANONICALIZE-NEXT: %21 = "neura.icmp"(%17) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
// AFTER_CANONICALIZE-NEXT: neura.cond_br %21 : i1 then %17, %18, %19, %20 : i32, i64, !llvm.ptr, i8 to ^bb3 else to ^bb8
// AFTER_CANONICALIZE-NEXT: ^bb3(%22: i32, %23: i64, %24: !llvm.ptr, %25: i8): // pred: ^bb2
// AFTER_CANONICALIZE-NEXT: %26 = neura.zext %22 : i32 -> i64
// AFTER_CANONICALIZE-NEXT: %27 = "neura.shl"(%26, %23) : (i64, i64) -> i64
// AFTER_CANONICALIZE-NEXT: "neura.memset"(%24, %25, %27) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
// AFTER_CANONICALIZE-NEXT: neura.br to ^bb8
// AFTER_CANONICALIZE-NEXT: ^bb4(%28: i32, %29: i32, %30: i64): // pred: ^bb1
// AFTER_CANONICALIZE-NEXT: %31 = neura.zext %28 : i32 -> i64
// AFTER_CANONICALIZE-NEXT: %32 = neura.zext %29 : i32 -> i64
// AFTER_CANONICALIZE-NEXT: neura.br %30, %30, %31 : i64, i64, i64 to ^bb5
// AFTER_CANONICALIZE-NEXT: ^bb5(%33: i64, %34: i64, %35: i64): // 2 preds: ^bb4, ^bb7
// AFTER_CANONICALIZE-NEXT: %36 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: "neura.store"(%36) {lhs_value = 0.000000e+00 : f64} : (!llvm.ptr) -> ()
// AFTER_CANONICALIZE-NEXT: %37 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: neura.br %34, %33, %35, %34 : i64, i64, i64, i64 to ^bb6
// AFTER_CANONICALIZE-NEXT: ^bb6(%38: i64, %39: i64, %40: i64, %41: i64): // 2 preds: ^bb5, ^bb6
// AFTER_CANONICALIZE-NEXT: %42 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg3"} : (i64) -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: %43 = "neura.load"(%42) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %44 = "neura.load"(%37) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %45 = "neura.gep"(%39, %38) <{operandSegmentSizes = array<i32: 0, 2>}> {lhs_value = "%arg2"} : (i64, i64) -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: %46 = "neura.load"(%45) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %47 = "neura.fmul_fadd"(%44, %46, %43) : (f64, f64, f64) -> f64
// AFTER_CANONICALIZE-NEXT: "neura.store"(%47, %42) : (f64, !llvm.ptr) -> ()
// AFTER_CANONICALIZE-NEXT: %48 = "neura.load"(%36) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %49 = "neura.load"(%45) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %50 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg5"} : (i64) -> !llvm.ptr
// AFTER_CANONICALIZE-NEXT: %51 = "neura.load"(%50) : (!llvm.ptr) -> f64
// AFTER_CANONICALIZE-NEXT: %52 = "neura.fmul_fadd"(%49, %51, %48) : (f64, f64, f64) -> f64
// AFTER_CANONICALIZE-NEXT: "neura.store"(%52, %36) : (f64, !llvm.ptr) -> ()
// AFTER_CANONICALIZE-NEXT: %53 = "neura.add"(%38) {rhs_value = 1 : i64} : (i64) -> i64
// AFTER_CANONICALIZE-NEXT: %54 = "neura.icmp"(%53, %32) <{cmpType = "eq"}> : (i64, i64) -> i1
// AFTER_CANONICALIZE-NEXT: neura.cond_br %54 : i1 then %39, %40, %41 : i64, i64, i64 to ^bb7 else %53, %39, %40, %41 : i64, i64, i64, i64 to ^bb6
// AFTER_CANONICALIZE-NEXT: ^bb7(%55: i64, %56: i64, %57: i64): // pred: ^bb6
// AFTER_CANONICALIZE-NEXT: %58 = "neura.add"(%55) {rhs_value = 1 : i64} : (i64) -> i64
// AFTER_CANONICALIZE-NEXT: %59 = "neura.icmp"(%58, %56) <{cmpType = "eq"}> : (i64, i64) -> i1
// AFTER_CANONICALIZE-NEXT: neura.cond_br %59 : i1 then to ^bb8 else %58, %57, %56 : i64, i64, i64 to ^bb5
// AFTER_CANONICALIZE-NEXT: ^bb8: // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
// AFTER_CANONICALIZE-NEXT: "neura.return"() : () -> ()
// AFTER_CANONICALIZE-NEXT: }

//MAPPING: module
//MAPPING: func.func
//MAPPING: neura.
//MAPPING: neura.return
//MAPPING: func.func @kernel
//MAPPING-SAME: accelerator = "neura", dataflow_mode = "predicate"
//MAPPING-SAME: mapping_info = {compiled_ii = 12 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 5 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}

// YAML: array_config:
// YAML-NEXT: columns: 4
Expand All @@ -115,14 +179,32 @@
// YAML-NEXT: - timestep: 0
// YAML-NEXT: operations:
// YAML-NEXT: - opcode: "CONSTANT"
// YAML-NEXT: src_operands:
// YAML-NEXT: - operand: "#0"
// YAML-NEXT: color: "RED"
// YAML-NEXT: dst_operands:
// YAML-NEXT: - operand: "EAST"
// YAML-NEXT: color: "RED"

// ASM: PE(0,0):
// ASM-NEXT: {
// ASM-NEXT: CONSTANT, [#0] -> [EAST, RED]
// ASM-NEXT: } (t=0)
// ASM-NEXT: {
// ASM-NEXT: GRANT_ONCE, [] -> [EAST, RED]
// ASM-NEXT: GRANT_ONCE, [] -> [EAST, RED], [NORTH, RED]
// ASM-NEXT: } (t=2)
// ASM-NEXT: {
// ASM-NEXT: GRANT_ONCE, [#0] -> [NORTH, RED]
// ASM-NEXT: } (t=3)
// ASM-NEXT: DATA_MOV, [WEST, RED] -> [$0]
// ASM-NEXT: } (t=4)
// ASM-NEXT: {
// ASM-NEXT: DATA_MOV, [WEST, RED] -> [$1]
// ASM-NEXT: } (t=5)
// ASM-NEXT: {
// ASM-NEXT: GRANT_PREDICATE, [$0], [$1] -> [$0]
// ASM-NEXT: } (t=7)
// ASM-NEXT: {
// ASM-NEXT: ZEXT, [$0] -> [NORTH, RED]
// ASM-NEXT: } (t=8)
// ASM-NEXT: {
// ASM-NEXT: GRANT_ONCE, [] -> [NORTH, RED]
// ASM-NEXT: } (t=11)
11 changes: 5 additions & 6 deletions test/neura/fusion/test.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,10 @@
# RUN: --insert-data-mov \
# RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized" %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-MAPPING

# CHECK-FUSED: func.func
# CHECK-FUSED: func.func @_Z6kernelPA1024_iPiS1_S1_S1_
# CHECK-FUSED: accelerator = "neura"
# CHECK-FUSED: %102 = neura.load_indexed %100[%101 : !neura.data<i64, i1>] !neura.data<!llvm.ptr, i1> : !neura.data<i32, i1>
# CHECK-FUSED: %33 = "neura.mul_add"(%30, %31, %32) : (i32, i32, i32) -> i32
# CHECK-FUSED: %42 = "neura.mul_add"(%39, %40, %41) : (i32, i32, i32) -> i32
# CHECK-FUSED-DAG: %94 = neura.load_indexed %92[%93 : !neura.data<i64, i1>] !neura.data<!llvm.ptr, i1> : !neura.data<i32, i1>
# CHECK-FUSED-DAG: %85 = "neura.mul_add"(%82, %83, %84) : (!neura.data<i32, i1>, !neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
# CHECK-FUSED-DAG: %98 = "neura.mul_add"(%95, %96, %97) : (!neura.data<i32, i1>, !neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>

# CHECK-MAPPING: mapping_info = {compiled_ii = 18 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 5 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}
# CHECK-MAPPING: mapping_locs
# CHECK-MAPPING: mapping_info = {compiled_ii = 14 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 5 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}