coredac · n0thingNoob · Jan 17, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/test/benchmark/CGRA-Bench b/test/benchmark/CGRA-Bench
diff --git a/test/e2e/relu/relu_kernel.mlir b/test/e2e/relu/relu_kernel.mlir
@@ -1,11 +1,9 @@
-// Compile the C kernel to LLVM IR (let clang handle headers and macros).
-// Use -I %S so local headers (relu.h, polybench.h) are visible.
-// RUN: clang -S -emit-llvm -O3 -fno-vectorize -fno-unroll-loops -std=c11 \
-// RUN:   -I %S/../../benchmark/CGRA-Bench/kernels/relu -DSMALL_DATASET \
-// RUN:   -o %t-kernel-full.ll %S/../../benchmark/CGRA-Bench/kernels/relu/relu.c
+// Compile the int ReLU C++ kernel to LLVM IR.
-// Compile the int ReLU C++ kernel to LLVM IR.
+// Compile the integer-based ReLU C++ kernel (operating on integer arrays) to LLVM IR.
-// Compile the int ReLU C++ kernel to LLVM IR.
+// Compile the integer-based ReLU C++ kernel (operating on integer arrays) to LLVM IR.
+// RUN: clang++ -S -emit-llvm -O3 -fno-vectorize -fno-unroll-loops -std=c++17 \
+// RUN:   -o %t-kernel-full.ll %S/../../benchmark/CGRA-Bench/kernels/relu/relu_int.cpp
 //
-// Extract only the kernel function(s). PolyBench typically uses kernel_relu,
-// so a regex keeps this robust across name variants.
+// Extract only the kernel function(s) from relu_int.cpp. The regex keeps this
+// robust across possible kernel name variants (e.g., names containing "kernel").
 // RUN: llvm-extract --rfunc=".*kernel.*" %t-kernel-full.ll -o %t-kernel-only.ll
 //
 // Import the LLVM IR into MLIR (LLVM dialect).
@@ -32,113 +30,93 @@
 // RUN: FileCheck %s --input-file=tmp-generated-instructions.yaml --check-prefix=YAML
 // RUN: FileCheck %s --input-file=tmp-generated-instructions.asm --check-prefix=ASM
 //
-// Check the mapped MLIR contains proper structure and neura operations.
+// Check the mapped MLIR contains key operations with full statements.
 // RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING
-// MAPPING:      func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: i32 {llvm.noundef}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.writeonly}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readnone}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 5 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 5 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
-// MAPPING:          %0 = "neura.grant_once"() <{constant_value = 0 : i32}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data<i32, i1>
-// MAPPING-NEXT:     %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data<i32, i1>
+// MAPPING:          %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = {{.*}}} : () -> !neura.data<i64, i1>
+// MAPPING:          %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data<i64, i1>
+// MAPPING:          %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %3 = neura.phi_start %2, %1 {dfg_id = 4 : i32, mapping_locs = {{.*}}} : !neura.data<i64, i1>, !neura.data<i64, i1> -> !neura.data<i64, i1>
+// MAPPING:          %4 = "neura.data_mov"(%3) {dfg_id = 7 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %5 = "neura.gep"(%4) <{operandSegmentSizes = array<i32: 0, 1>}> {dfg_id = 9 : i32, lhs_value = "%arg0", mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<!llvm.ptr, i1>
+// MAPPING:          %6 = "neura.data_mov"(%5) {dfg_id = 12 : i32, mapping_locs = {{.*}}} : (!neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
+// MAPPING:          %7 = "neura.load"(%6) {dfg_id = 14 : i32, mapping_locs = {{.*}}} : (!neura.data<!llvm.ptr, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %8 = "neura.data_mov"(%7) {dfg_id = 19 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {dfg_id = 22 : i32, mapping_locs = {{.*}}, rhs_value = 0 : i32} : (!neura.data<i32, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %10 = "neura.data_mov"(%3) {dfg_id = 6 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %11 = "neura.data_mov"(%9) {dfg_id = 26 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %12 = neura.grant_predicate %10, %11 {dfg_id = 30 : i32, mapping_locs = {{.*}}} : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MAPPING:          %13 = "neura.data_mov"(%7) {dfg_id = 18 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %14 = "neura.data_mov"(%9) {dfg_id = 25 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %15 = neura.grant_predicate %13, %14 {dfg_id = 29 : i32, mapping_locs = {{.*}}} : !neura.data<i32, i1>, !neura.data<i1, i1> -> !neura.data<i32, i1>
+// MAPPING:          %16 = "neura.data_mov"(%12) {dfg_id = 33 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %17 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {dfg_id = 34 : i32, lhs_value = "%arg1", mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<!llvm.ptr, i1>
+// MAPPING:          %18 = "neura.data_mov"(%17) {dfg_id = 36 : i32, mapping_locs = {{.*}}} : (!neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
+// MAPPING:          %19 = "neura.load"(%18) {dfg_id = 37 : i32, mapping_locs = {{.*}}} : (!neura.data<!llvm.ptr, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %20 = "neura.data_mov"(%19) {dfg_id = 38 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %21 = "neura.data_mov"(%15) {dfg_id = 32 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %22 = "neura.add"(%20, %21) {dfg_id = 39 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %23 = "neura.data_mov"(%22) {dfg_id = 40 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// MAPPING:          %24 = "neura.data_mov"(%17) {dfg_id = 35 : i32, mapping_locs = {{.*}}} : (!neura.data<!llvm.ptr, i1>) -> !neura.data<!llvm.ptr, i1>
+// MAPPING:          "neura.store"(%23, %24) {dfg_id = 41 : i32, mapping_locs = {{.*}}} : (!neura.data<i32, i1>, !neura.data<!llvm.ptr, i1>) -> ()
+// MAPPING:          %25 = "neura.data_mov"(%3) {dfg_id = 5 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %26 = "neura.add"(%25) {dfg_id = 8 : i32, mapping_locs = {{.*}}, rhs_value = 1 : i64} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %27 = "neura.data_mov"(%26) {dfg_id = 11 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {dfg_id = 13 : i32, mapping_locs = {{.*}}, rhs_value = 32 : i64} : (!neura.data<i64, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %29 = "neura.data_mov"(%28) {dfg_id = 17 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %30 = "neura.not"(%29) {dfg_id = 21 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %31 = "neura.data_mov"(%26) {dfg_id = 10 : i32, mapping_locs = {{.*}}} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING:          %32 = "neura.data_mov"(%30) {dfg_id = 24 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %33 = neura.grant_predicate %31, %32 {dfg_id = 28 : i32, mapping_locs = {{.*}}} : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MAPPING:          neura.ctrl_mov %33 -> %1 {dfg_id = 31 : i32, mapping_locs = {{.*}}} : !neura.data<i64, i1> !neura.data<i64, i1>
+// MAPPING:          %34 = "neura.data_mov"(%28) {dfg_id = 15 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %35 = "neura.data_mov"(%28) {dfg_id = 16 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          %36 = neura.grant_predicate %34, %35 {dfg_id = 20 : i32, mapping_locs = {{.*}}} : !neura.data<i1, i1>, !neura.data<i1, i1> -> !neura.data<i1, i1>
+// MAPPING:          %37 = "neura.data_mov"(%36) {dfg_id = 23 : i32, mapping_locs = {{.*}}} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING:          neura.return_void %37 : !neura.data<i1, i1> {dfg_id = 27 : i32, mapping_locs = {{.*}}}
+// MAPPING:          neura.yield {dfg_id = 2 : i32}
+// MAPPING:        }
+// MAPPING:      }
+
 // YAML:      array_config:
 // YAML-NEXT:   columns: 4
 // YAML-NEXT:   rows: 4
 // YAML-NEXT:   compiled_ii: 5
 // YAML-NEXT:   cores:
 // YAML-NEXT:     - column: 2
-// YAML-NEXT:       row: 0
-// YAML-NEXT:       core_id: "2"
+// YAML-NEXT:       row: 1
+// YAML-NEXT:       core_id: "6"
 // YAML-NEXT:       entries:
 // YAML-NEXT:         - entry_id: "entry0"
 // YAML-NEXT:           instructions:
-// YAML-NEXT:             - index_per_ii: 4
+// YAML-NEXT:             - index_per_ii: 3
 // YAML-NEXT:               operations:
 // YAML-NEXT:                 - opcode: "DATA_MOV"
-// YAML-NEXT:                   id: 440001
-// YAML-NEXT:                   time_step: 9
+// YAML-NEXT:                   id: 380001
+// YAML-NEXT:                   time_step: 8
 // YAML-NEXT:                   invalid_iterations: 1
 // YAML-NEXT:                   src_operands:
 // YAML-NEXT:                     - operand: "EAST"
 // YAML-NEXT:                       color: "RED"
 // YAML-NEXT:                   dst_operands:
 // YAML-NEXT:                     - operand: "NORTH"
 // YAML-NEXT:                       color: "RED"
-// YAML-NEXT:     - column: 3
-// YAML-NEXT:       row: 0
 
-// ASM:      # Compiled II: 5
-// ASM:      PE(2,0):
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [NORTH, RED] (t=9, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=4)
-// ASM:      PE(3,0):
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [NORTH, RED] -> [$0] (t=5, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=0)
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [NORTH, RED] -> [$1] (t=7, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=2)
-// ASM-NEXT: {
-// ASM-NEXT:   GEP, [$0], [$1] -> [WEST, RED] (t=8, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=3)
-// ASM:      PE(2,1):
+// ASM: # Compiled II: 5
+// ASM: PE(3,2):
 // ASM-NEXT: {
-// ASM-NEXT:   STORE, [$0], [SOUTH, RED] (t=10, inv_iters=2)
+// ASM-NEXT:   GRANT_ONCE, [#0] -> [$0] (t=0, inv_iters=0)
+// ASM-NEXT:   DATA_MOV, [SOUTH, RED] -> [NORTH, RED] (t=5, inv_iters=1)
 // ASM-NEXT: } (idx_per_ii=0)
 // ASM-NEXT: {
-// ASM-NEXT:   ICMP_SGE, [EAST, RED], [#0] -> [$0] (t=7, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=2)
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [$1] (t=8, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=3)
-// ASM-NEXT: {
-// ASM-NEXT:   SEL, [$0], [$1], [NORTH, RED] -> [$0] (t=9, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=4)
-// ASM:      PE(3,1):
-// ASM-NEXT: {
-// ASM-NEXT:   GEP, [$0], [NORTH, RED] -> [$0] (t=5, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=0)
-// ASM-NEXT: {
-// ASM-NEXT:   LOAD, [$0] -> [WEST, RED], [$0] (t=6, inv_iters=1)
-// ASM-NEXT:   DATA_MOV, [NORTH, RED] -> [SOUTH, RED] (t=6, inv_iters=1)
+// ASM-NEXT:   PHI_START, [$0], [WEST, RED] -> [WEST, RED], [SOUTH, RED], [$0] (t=1, inv_iters=0)
 // ASM-NEXT: } (idx_per_ii=1)
 // ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [$0] -> [WEST, RED] (t=7, inv_iters=1)
+// ASM-NEXT:   ADD, [$0], [#1] -> [$0], [WEST, RED] (t=2, inv_iters=0)
+// ASM-NEXT:   DATA_MOV, [NORTH, RED] -> [SOUTH, RED] (t=12, inv_iters=2)
 // ASM-NEXT: } (idx_per_ii=2)
 // ASM-NEXT: {
-// ASM-NEXT:   DIV, [NORTH, RED], [#70] -> [$0] (t=3, inv_iters=0)
+// ASM-NEXT:   ICMP_EQ, [$0], [#32] -> [WEST, RED], [NORTH, RED], [SOUTH, RED] (t=3, inv_iters=0)
 // ASM-NEXT: } (idx_per_ii=3)
 // ASM-NEXT: {
-// ASM-NEXT:   ZEXT, [$0] -> [$0], [SOUTH, RED] (t=4, inv_iters=0)
+// ASM-NEXT:   ICMP_SGT, [WEST, RED], [#0] -> [SOUTH, RED], [NORTH, RED] (t=4, inv_iters=0)
 // ASM-NEXT: } (idx_per_ii=4)
-// ASM:      PE(1,2):
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [$2] (t=5, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=0)
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [$4] (t=6, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=1)
-// ASM-NEXT: {
-// ASM-NEXT:   PHI_START, [$0], [$3] -> [EAST, RED], [$3] (t=7, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=2)
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [$0] (t=3, inv_iters=0)
-// ASM-NEXT:   GRANT_PREDICATE, [$1], [$2] -> [NORTH, RED] (t=8, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=3)
-// ASM-NEXT: {
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [$1] (t=4, inv_iters=0)
-// ASM-NEXT:   GRANT_PREDICATE, [$3], [$4] -> [$3] (t=9, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=4)
-// ASM:      PE(2,2):
-// ASM-NEXT: {
-// ASM-NEXT:   GRANT_PREDICATE, [$1], [$0] -> [$0] (t=5, inv_iters=1)
-// ASM-NEXT:   DATA_MOV, [$2] -> [WEST, RED] (t=5, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=0)
-// ASM-NEXT: {
-// ASM-NEXT:   PHI_START, [EAST, RED], [$0] -> [EAST, RED], [$0] (t=1, inv_iters=0)
-// ASM-NEXT: } (idx_per_ii=1)
-// ASM-NEXT: {
-// ASM-NEXT:   ADD, [$0], [#1] -> [$0], [$1] (t=2, inv_iters=0)
-// ASM-NEXT:   DATA_MOV, [EAST, RED] -> [WEST, RED] (t=2, inv_iters=0)
-// ASM-NEXT: } (idx_per_ii=2)
-// ASM-NEXT: {
-// ASM-NEXT:   ICMP_EQ, [$0], [#4200] -> [$0], [WEST, RED], [$2] (t=3, inv_iters=0)
-// ASM-NEXT:   DATA_MOV, [WEST, RED] -> [SOUTH, RED] (t=8, inv_iters=1)
-// ASM-NEXT: } (idx_per_ii=3)