Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,10 +485,10 @@ void transformControlFlowToDataFlow(Region &region, ControlFlowInfo &ctrl_info,
}

// Flattens blocks into the entry block.
Block *entryBlock = &region.front();
Block *entry_block = &region.front();
SmallVector<Block *> blocks_to_flatten;
for (Block &block : region) {
if (&block != entryBlock) {
if (&block != entry_block) {
blocks_to_flatten.push_back(&block);
}
}
Expand All @@ -508,12 +508,12 @@ void transformControlFlowToDataFlow(Region &region, ControlFlowInfo &ctrl_info,
auto &ops = block->getOperations();
while (!ops.empty()) {
Operation &op = ops.front();
op.moveBefore(&entryBlock->back());
op.moveBefore(&entry_block->back());
}
}

// Erases any remaining br/cond_br that were moved into the entry block.
for (Operation &op : llvm::make_early_inc_range(*entryBlock)) {
for (Operation &op : llvm::make_early_inc_range(*entry_block)) {
if (isa<neura::Br>(op) || isa<neura::CondBr>(op)) {
op.erase();
}
Expand All @@ -523,6 +523,28 @@ void transformControlFlowToDataFlow(Region &region, ControlFlowInfo &ctrl_info,
for (Block *block : blocks_to_flatten) {
block->erase();
}

// Handles return operations in the entry block.
SmallVector<neura::ReturnOp> return_ops;
for (Operation &op : *entry_block) {
if (neura::ReturnOp return_op = dyn_cast<neura::ReturnOp>(op)) {
return_ops.push_back(return_op);
}
}

if (return_ops.size() > 1) {
llvm::errs() << "[ctrl2data] Error: Multiple ReturnOps found in the entry "
"block after flattening.\n";
assert(false &&
"Multiple ReturnOps found in the entry block after flattening.");
} else if (return_ops.size() == 1) {
neura::ReturnOp last_return = return_ops.back();
last_return->moveAfter(&entry_block->back());
} else {
llvm::errs() << "[ctrl2data] Error: No ReturnOp found in the entry "
"block after flattening.\n";
assert(false && "No ReturnOp found in the entry block after flattening.");
}
}

namespace {
Expand Down
File renamed without changes.
37 changes: 37 additions & 0 deletions test/neura/for_loop/relu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include <stdio.h>

#define N 32

int input[N] = {1, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10,
-11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21,
22, -23, 24, -25, 26, -27, 28, -29, 30, -31};

int output[N];

void kernel(int input[], int output[]);

int main() {
// init output
for (int i = 0; i < N; i++) {
output[i] = 0;
}

kernel(input, output);

// print outputs
for (int i = 0; i < N; i++) {
printf("output[%d] = %d\n", i, output[i]);
}

return 0;
}

void kernel(int input[], int output[]) {
for (int i = 0; i < N; ++i) {
if (input[i] > 0) {
output[i] += input[i];
} else {
output[i] += 0;
}
}
}
115 changes: 115 additions & 0 deletions test/neura/for_loop/relu_test.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Compiles the original kernel to mlir, then lower back to llvm, eventually binary.
// RUN: clang++ -S -emit-llvm -O1 -o %t-relu.ll relu.cpp
// RUN: mlir-translate --import-llvm %t-relu.ll -o %t-relu.mlir

// RUN: mlir-neura-opt %t-relu.mlir\
// RUN: --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --canonicalize-live-in \
// RUN: | FileCheck %s

// RUN: mlir-neura-opt %t-relu.mlir\
// RUN: --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --canonicalize-live-in \
// RUN: --leverage-predicated-value \
// RUN: --transform-ctrl-to-data-flow \
// RUN: | FileCheck %s --check-prefix=CTRL2DATA

// CHECK: llvm.func local_unnamed_addr @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
// CHECK-NEXT: %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> !llvm.ptr
// CHECK-NEXT: %1 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !llvm.ptr
// CHECK-NEXT: %2 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> i64
// CHECK-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> i32
// CHECK-NEXT: %4 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> i64
// CHECK-NEXT: %5 = "neura.constant"() <{predicate = true, value = 32 : i64}> : () -> i64
// CHECK-NEXT: neura.br %2, %0, %3, %1, %4, %5 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2
// CHECK-NEXT: ^bb1: // pred: ^bb4
// CHECK-NEXT: "neura.return"() : () -> ()
// CHECK-NEXT: ^bb2(%6: i64, %7: !llvm.ptr, %8: i32, %9: !llvm.ptr, %10: i64, %11: i64): // 2 preds: ^bb0, ^bb4
// CHECK-NEXT: %12 = "neura.gep"(%7, %6) : (!llvm.ptr, i64) -> !llvm.ptr
// CHECK-NEXT: %13 = "neura.load"(%12) : (!llvm.ptr) -> i32
// CHECK-NEXT: %14 = "neura.icmp"(%13, %8) <{cmpType = "sgt"}> : (i32, i32) -> i1
// CHECK-NEXT: neura.cond_br %14 : i1 then %9, %6, %13, %10, %11, %7, %8 : !llvm.ptr, i64, i32, i64, i64, !llvm.ptr, i32 to ^bb3 else %6, %10, %11, %7, %8, %9 : i64, i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4
// CHECK-NEXT: ^bb3(%15: !llvm.ptr, %16: i64, %17: i32, %18: i64, %19: i64, %20: !llvm.ptr, %21: i32): // pred: ^bb2
// CHECK-NEXT: %22 = "neura.gep"(%15, %16) : (!llvm.ptr, i64) -> !llvm.ptr
// CHECK-NEXT: %23 = "neura.load"(%22) : (!llvm.ptr) -> i32
// CHECK-NEXT: %24 = "neura.add"(%23, %17) : (i32, i32) -> i32
// CHECK-NEXT: "neura.store"(%24, %22) : (i32, !llvm.ptr) -> ()
// CHECK-NEXT: neura.br %16, %18, %19, %20, %21, %15 : i64, i64, i64, !llvm.ptr, i32, !llvm.ptr to ^bb4
// CHECK-NEXT: ^bb4(%25: i64, %26: i64, %27: i64, %28: !llvm.ptr, %29: i32, %30: !llvm.ptr): // 2 preds: ^bb2, ^bb3
// CHECK-NEXT: %31 = "neura.add"(%25, %26) : (i64, i64) -> i64
// CHECK-NEXT: %32 = "neura.icmp"(%31, %27) <{cmpType = "eq"}> : (i64, i64) -> i1
// CHECK-NEXT: neura.cond_br %32 : i1 then to ^bb1 else %31, %28, %29, %30, %26, %27 : i64, !llvm.ptr, i32, !llvm.ptr, i64, i64 to ^bb2
// CHECK-NEXT: }


// CTRL2DATA: llvm.func local_unnamed_addr @_Z6kernelPiS_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
// CTRL2DATA-NEXT: %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %2 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %3 = "neura.grant_once"(%2) : (!neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %4 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %6 = "neura.constant"() <{predicate = true, value = 0 : i32}> : () -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %7 = "neura.grant_once"(%6) : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %8 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %9 = "neura.grant_once"(%8) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %10 = "neura.constant"() <{predicate = true, value = 32 : i64}> : () -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %11 = "neura.grant_once"(%10) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %12 = neura.reserve : !neura.data<i64, i1>
// CTRL2DATA-NEXT: %13 = "neura.phi"(%12, %11) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %14 = neura.reserve : !neura.data<i64, i1>
// CTRL2DATA-NEXT: %15 = "neura.phi"(%14, %9) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %16 = neura.reserve : !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %17 = "neura.phi"(%16, %3) : (!neura.data<!llvm.ptr, i1>, !neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %18 = neura.reserve : !neura.data<i32, i1>
// CTRL2DATA-NEXT: %19 = "neura.phi"(%18, %7) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %20 = neura.reserve : !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %21 = "neura.phi"(%20, %1) : (!neura.data<!llvm.ptr, i1>, !neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %22 = neura.reserve : !neura.data<i64, i1>
// CTRL2DATA-NEXT: %23 = "neura.phi"(%22, %5) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %24 = "neura.gep"(%21, %23) : (!neura.data<!llvm.ptr, i1>, !neura.data<i64, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %25 = "neura.load"(%24) : (!neura.data<!llvm.ptr, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %26 = "neura.icmp"(%25, %19) <{cmpType = "sgt"}> : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i1, i1>
// CTRL2DATA-NEXT: %27 = neura.grant_predicate %17, %26 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %28 = neura.grant_predicate %23, %26 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %29 = neura.grant_predicate %25, %26 : !neura.data<i32, i1>, !neura.data<i1, i1> -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %30 = neura.grant_predicate %15, %26 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %31 = neura.grant_predicate %13, %26 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %32 = neura.grant_predicate %21, %26 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %33 = neura.grant_predicate %19, %26 : !neura.data<i32, i1>, !neura.data<i1, i1> -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %34 = "neura.not"(%26) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
// CTRL2DATA-NEXT: %35 = neura.grant_predicate %23, %34 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %36 = neura.grant_predicate %15, %34 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %37 = neura.grant_predicate %13, %34 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %38 = neura.grant_predicate %21, %34 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %39 = neura.grant_predicate %19, %34 : !neura.data<i32, i1>, !neura.data<i1, i1> -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %40 = neura.grant_predicate %17, %34 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %41 = "neura.gep"(%27, %28) : (!neura.data<!llvm.ptr, i1>, !neura.data<i64, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %42 = "neura.load"(%41) : (!neura.data<!llvm.ptr, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %43 = "neura.add"(%42, %29) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: "neura.store"(%43, %41) : (!neura.data<i32, i1>, !neura.data<!llvm.ptr, i1>) -> ()
// CTRL2DATA-NEXT: %44 = "neura.phi"(%40, %27) : (!neura.data<!llvm.ptr, i1>, !neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %45 = "neura.phi"(%39, %33) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %46 = "neura.phi"(%38, %32) : (!neura.data<!llvm.ptr, i1>, !neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %47 = "neura.phi"(%37, %31) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %48 = "neura.phi"(%36, %30) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %49 = "neura.phi"(%35, %28) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %50 = "neura.add"(%49, %48) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %51 = "neura.icmp"(%50, %47) <{cmpType = "eq"}> : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
// CTRL2DATA-NEXT: %52 = "neura.not"(%51) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
// CTRL2DATA-NEXT: %53 = neura.grant_predicate %50, %52 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %22 : !neura.data<i64, i1> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %54 = neura.grant_predicate %46, %52 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %20 : !neura.data<!llvm.ptr, i1> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %55 = neura.grant_predicate %45, %52 : !neura.data<i32, i1>, !neura.data<i1, i1> -> !neura.data<i32, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %18 : !neura.data<i32, i1> !neura.data<i32, i1>
// CTRL2DATA-NEXT: %56 = neura.grant_predicate %44, %52 : !neura.data<!llvm.ptr, i1>, !neura.data<i1, i1> -> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %16 : !neura.data<!llvm.ptr, i1> !neura.data<!llvm.ptr, i1>
// CTRL2DATA-NEXT: %57 = neura.grant_predicate %48, %52 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %14 : !neura.data<i64, i1> !neura.data<i64, i1>
// CTRL2DATA-NEXT: %58 = neura.grant_predicate %47, %52 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
// CTRL2DATA-NEXT: neura.ctrl_mov %58 -> %12 : !neura.data<i64, i1> !neura.data<i64, i1>
// CTRL2DATA-NEXT: "neura.return"() : () -> ()
// CTRL2DATA-NEXT: }