diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index c28e50db..13357291 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -661,6 +661,29 @@ struct LlvmConstantToNeuraConstant : public OpRewritePattern { } }; +struct LlvmAddressOfToNeuraConstant + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(LLVM::AddressOfOp op, + PatternRewriter &rewriter) const override { + // Represents address-of as a Neura constant carrying the referenced + // global symbol. + Attribute global_symbol_attr = op->getAttr("global_name"); + if (!global_symbol_attr) { + return op.emitOpError("expects global_name attribute"); + } + + OperationState state(op.getLoc(), neura::ConstantOp::getOperationName()); + state.addAttribute("value", global_symbol_attr); + state.addTypes(op.getType()); + + Operation *new_op = rewriter.create(state); + rewriter.replaceOp(op, new_op->getResults()); + return success(); + } +}; + struct LlvmAllocaToNeuraAlloca : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -880,6 +903,7 @@ struct LowerLlvmToNeuraPass RewritePatternSet populateLlvmToNeuraPatterns(MLIRContext *context) { RewritePatternSet patterns(context); patterns.add(&getContext()); + patterns.add(&getContext()); // Vector operations must be registered before scalar operations // to ensure vector types are matched first. patterns.add(&getContext()); diff --git a/test/affine2neura/bert/bert_node0/bert_node0.cpp b/test/Conversion/affine2neura/bert/bert_node0/bert_node0.cpp similarity index 100% rename from test/affine2neura/bert/bert_node0/bert_node0.cpp rename to test/Conversion/affine2neura/bert/bert_node0/bert_node0.cpp diff --git a/test/affine2neura/bert/bert_node0/bert_node0.mlir b/test/Conversion/affine2neura/bert/bert_node0/bert_node0.mlir similarity index 100% rename from test/affine2neura/bert/bert_node0/bert_node0.mlir rename to test/Conversion/affine2neura/bert/bert_node0/bert_node0.mlir diff --git a/test/affine2neura/bert/bert_node1/bert_node1.cpp b/test/Conversion/affine2neura/bert/bert_node1/bert_node1.cpp similarity index 100% rename from test/affine2neura/bert/bert_node1/bert_node1.cpp rename to test/Conversion/affine2neura/bert/bert_node1/bert_node1.cpp diff --git a/test/affine2neura/bert/bert_node1/bert_node1.mlir b/test/Conversion/affine2neura/bert/bert_node1/bert_node1.mlir similarity index 100% rename from test/affine2neura/bert/bert_node1/bert_node1.mlir rename to test/Conversion/affine2neura/bert/bert_node1/bert_node1.mlir diff --git a/test/affine2neura/bert/bert_node2/bert_node2.cpp b/test/Conversion/affine2neura/bert/bert_node2/bert_node2.cpp similarity index 100% rename from test/affine2neura/bert/bert_node2/bert_node2.cpp rename to test/Conversion/affine2neura/bert/bert_node2/bert_node2.cpp diff --git a/test/affine2neura/bert/bert_node2/bert_node2.mlir b/test/Conversion/affine2neura/bert/bert_node2/bert_node2.mlir similarity index 100% rename from test/affine2neura/bert/bert_node2/bert_node2.mlir rename to test/Conversion/affine2neura/bert/bert_node2/bert_node2.mlir diff --git a/test/affine2neura/bert/bert_node28/bert_node28.cpp b/test/Conversion/affine2neura/bert/bert_node28/bert_node28.cpp similarity index 100% rename from test/affine2neura/bert/bert_node28/bert_node28.cpp rename to test/Conversion/affine2neura/bert/bert_node28/bert_node28.cpp diff --git a/test/affine2neura/bert/bert_node28/bert_node28.mlir b/test/Conversion/affine2neura/bert/bert_node28/bert_node28.mlir similarity index 100% rename from test/affine2neura/bert/bert_node28/bert_node28.mlir rename to test/Conversion/affine2neura/bert/bert_node28/bert_node28.mlir diff --git a/test/affine2neura/bert/bert_node3/bert_node3.cpp b/test/Conversion/affine2neura/bert/bert_node3/bert_node3.cpp similarity index 100% rename from test/affine2neura/bert/bert_node3/bert_node3.cpp rename to test/Conversion/affine2neura/bert/bert_node3/bert_node3.cpp diff --git a/test/affine2neura/bert/bert_node3/bert_node3.mlir b/test/Conversion/affine2neura/bert/bert_node3/bert_node3.mlir similarity index 100% rename from test/affine2neura/bert/bert_node3/bert_node3.mlir rename to test/Conversion/affine2neura/bert/bert_node3/bert_node3.mlir diff --git a/test/affine2neura/bert/bert_node8/bert_node8.cpp b/test/Conversion/affine2neura/bert/bert_node8/bert_node8.cpp similarity index 100% rename from test/affine2neura/bert/bert_node8/bert_node8.cpp rename to test/Conversion/affine2neura/bert/bert_node8/bert_node8.cpp diff --git a/test/affine2neura/bert/bert_node8/bert_node8.mlir b/test/Conversion/affine2neura/bert/bert_node8/bert_node8.mlir similarity index 100% rename from test/affine2neura/bert/bert_node8/bert_node8.mlir rename to test/Conversion/affine2neura/bert/bert_node8/bert_node8.mlir diff --git a/test/affine2neura/bert/bert_node9/bert_node9.cpp b/test/Conversion/affine2neura/bert/bert_node9/bert_node9.cpp similarity index 100% rename from test/affine2neura/bert/bert_node9/bert_node9.cpp rename to test/Conversion/affine2neura/bert/bert_node9/bert_node9.cpp diff --git a/test/affine2neura/bert/bert_node9/bert_node9.mlir b/test/Conversion/affine2neura/bert/bert_node9/bert_node9.mlir similarity index 100% rename from test/affine2neura/bert/bert_node9/bert_node9.mlir rename to test/Conversion/affine2neura/bert/bert_node9/bert_node9.mlir diff --git a/test/Conversion/AffineToNeura/complex-affine-expressions.mlir b/test/Conversion/affine2neura/complex-affine-expressions.mlir similarity index 100% rename from test/Conversion/AffineToNeura/complex-affine-expressions.mlir rename to test/Conversion/affine2neura/complex-affine-expressions.mlir diff --git a/test/Conversion/AffineToNeura/deep-nesting.mlir b/test/Conversion/affine2neura/deep-nesting.mlir similarity index 100% rename from test/Conversion/AffineToNeura/deep-nesting.mlir rename to test/Conversion/affine2neura/deep-nesting.mlir diff --git a/test/Conversion/AffineToNeura/imperfect-ops-after.mlir b/test/Conversion/affine2neura/imperfect-ops-after.mlir similarity index 100% rename from test/Conversion/AffineToNeura/imperfect-ops-after.mlir rename to test/Conversion/affine2neura/imperfect-ops-after.mlir diff --git a/test/Conversion/AffineToNeura/loop-nest-optimization.mlir b/test/Conversion/affine2neura/loop-nest-optimization.mlir similarity index 100% rename from test/Conversion/AffineToNeura/loop-nest-optimization.mlir rename to test/Conversion/affine2neura/loop-nest-optimization.mlir diff --git a/test/Conversion/AffineToNeura/single-iteration.mlir b/test/Conversion/affine2neura/single-iteration.mlir similarity index 100% rename from test/Conversion/AffineToNeura/single-iteration.mlir rename to test/Conversion/affine2neura/single-iteration.mlir diff --git a/test/Conversion/AffineToNeura/unsupported-affine-if.mlir b/test/Conversion/affine2neura/unsupported-affine-if.mlir similarity index 100% rename from test/Conversion/AffineToNeura/unsupported-affine-if.mlir rename to test/Conversion/affine2neura/unsupported-affine-if.mlir diff --git a/test/arith2neura/add.mlir b/test/Conversion/arith2neura/add.mlir similarity index 100% rename from test/arith2neura/add.mlir rename to test/Conversion/arith2neura/add.mlir diff --git a/test/Conversion/c2llvm2mlir/global_mem_address/kernel.c b/test/Conversion/c2llvm2mlir/global_mem_address/kernel.c new file mode 100644 index 00000000..cc33ab0f --- /dev/null +++ b/test/Conversion/c2llvm2mlir/global_mem_address/kernel.c @@ -0,0 +1,40 @@ +// GEMV + ReLU + GEMV kernel chain for codegen tests (int-only). +// All three kernels are implemented directly inside main(). + +#define M 4 +#define K 4 +#define N 4 + +static int run_gemv_relu_gemv(void) { + // Inputs / outputs: filled by simulation; only shape M, N, K matter here. + static int A[M * K]; + static int x[K]; + static int y[M]; + + // Kernel 1: GEMV + for (int i = 0; i < M; ++i) { + int acc = 0; + for (int j = 0; j < K; ++j) { + acc += A[i * K + j] * x[j]; + } + y[i] = acc; + } + + // Kernel 2: ReLU + for (int i = 0; i < M; ++i) { + if (y[i] < 0) { + y[i] = 0; + } + } + + // Return a checksum-like value so outputs are consumed. + int checksum = 0; + for (int i = 0; i < N; ++i) { + checksum += y[i]; + } + return checksum & 0xFF; +} + +int kernel_gemv_relu_gemv(void) { return run_gemv_relu_gemv(); } + +int main(void) { return run_gemv_relu_gemv(); } diff --git a/test/Conversion/c2llvm2mlir/global_mem_address/kernel.mlir b/test/Conversion/c2llvm2mlir/global_mem_address/kernel.mlir new file mode 100644 index 00000000..bf4e3be1 --- /dev/null +++ b/test/Conversion/c2llvm2mlir/global_mem_address/kernel.mlir @@ -0,0 +1,78 @@ +// Compiles an attention-style GEMM+Softmax+GEMM kernel to LLVM IR, imports to MLIR, +// then lowers via Neura. +// RUN: clang -S -emit-llvm -O3 -fno-vectorize -fno-unroll-loops -std=c11 \ +// RUN: -o %t-kernel-full.ll %S/kernel.c +// RUN: llvm-extract --rfunc=".*kernel.*" %t-kernel-full.ll -o %t-kernel-only.ll +// RUN: mlir-translate --import-llvm %t-kernel-only.ll -o %t-kernel.mlir +// +// RUN: mlir-neura-opt %t-kernel.mlir \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --promote-input-arg-to-const \ +// RUN: --fold-constant \ +// RUN: --canonicalize-return \ +// RUN: --canonicalize-live-in \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: --fold-constant \ +// RUN: -o %t-dataflow.mlir +// RUN: FileCheck %s --input-file=%t-dataflow.mlir --check-prefix=DATAFLOW + +// DATAFLOW: llvm.mlir.global external hidden unnamed_addr @run_gemv_relu_gemv.y() {addr_space = 0 : i32, alignment = 16 : i64, dso_local} : !llvm.array<4 x i32> +// DATAFLOW-NEXT: func.func @kernel_gemv_relu_gemv() -> (i32 {llvm.range = #llvm.constant_range}) attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// DATAFLOW-NEXT: %0 = "neura.constant"() <{value = @run_gemv_relu_gemv.y}> : () -> !neura.data +// DATAFLOW-NEXT: %1 = "neura.constant"() <{value = 0 : i8}> : () -> !neura.data +// DATAFLOW-NEXT: %2 = "neura.constant"() <{value = 16 : i64}> : () -> !neura.data +// DATAFLOW-NEXT: %3 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data +// DATAFLOW-NEXT: %4 = "neura.grant_once"() <{constant_value = 0 : i32}> : () -> !neura.data +// DATAFLOW-NEXT: "neura.memset"(%0, %1, %2) <{is_volatile = false}> : (!neura.data, !neura.data, !neura.data) -> () +// DATAFLOW-NEXT: %5 = neura.reserve : !neura.data +// DATAFLOW-NEXT: %6 = neura.phi_start %4, %5 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %7 = neura.reserve : !neura.data +// DATAFLOW-NEXT: %8 = neura.phi_start %3, %7 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %9 = neura.reserve : !neura.data +// DATAFLOW-NEXT: %10 = neura.phi_start %3, %9 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %11 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// DATAFLOW-NEXT: %12 = "neura.gep"(%11, %10) <{operandSegmentSizes = array}> {lhs_value = @run_gemv_relu_gemv.y} : (!neura.data, !neura.data) -> !neura.data +// DATAFLOW-NEXT: %13 = "neura.load"(%12) : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %14 = "neura.icmp"(%13) <{cmpType = "slt"}> {rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %15 = neura.grant_predicate %12, %14 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %16 = neura.grant_predicate %8, %14 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %17 = neura.grant_predicate %6, %14 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %18 = "neura.not"(%14) : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %19 = neura.grant_predicate %8, %18 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %20 = neura.grant_predicate %6, %18 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: "neura.store"(%15) {lhs_value = 0 : i32} : (!neura.data) -> () +// DATAFLOW-NEXT: %21 = "neura.phi"(%20, %17) : (!neura.data, !neura.data) -> !neura.data +// DATAFLOW-NEXT: %22 = "neura.phi"(%19, %16) : (!neura.data, !neura.data) -> !neura.data +// DATAFLOW-NEXT: %23 = "neura.add"(%10) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %24 = "neura.icmp"(%23) <{cmpType = "eq"}> {rhs_value = 4 : i64} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %25 = "neura.not"(%24) : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %26 = neura.grant_predicate %23, %25 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: neura.ctrl_mov %26 -> %9 : !neura.data !neura.data +// DATAFLOW-NEXT: %27 = neura.grant_predicate %22, %25 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: neura.ctrl_mov %27 -> %7 : !neura.data !neura.data +// DATAFLOW-NEXT: %28 = neura.grant_predicate %21, %25 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: neura.ctrl_mov %28 -> %5 : !neura.data !neura.data +// DATAFLOW-NEXT: %29 = neura.grant_predicate %22, %24 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %30 = neura.grant_predicate %21, %24 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %31 = neura.reserve : !neura.data +// DATAFLOW-NEXT: %32 = neura.phi_start %30, %31 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %33 = neura.reserve : !neura.data +// DATAFLOW-NEXT: %34 = neura.phi_start %29, %33 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %35 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data +// DATAFLOW-NEXT: %36 = "neura.gep"(%35, %34) <{operandSegmentSizes = array}> {lhs_value = @run_gemv_relu_gemv.y} : (!neura.data, !neura.data) -> !neura.data +// DATAFLOW-NEXT: %37 = "neura.load"(%36) : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %38 = "neura.add"(%37, %32) : (!neura.data, !neura.data) -> !neura.data +// DATAFLOW-NEXT: %39 = "neura.add"(%34) {rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %40 = "neura.icmp"(%39) <{cmpType = "eq"}> {rhs_value = 4 : i64} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %41 = "neura.not"(%40) : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: %42 = neura.grant_predicate %39, %41 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: neura.ctrl_mov %42 -> %33 : !neura.data !neura.data +// DATAFLOW-NEXT: %43 = neura.grant_predicate %38, %41 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: neura.ctrl_mov %43 -> %31 : !neura.data !neura.data +// DATAFLOW-NEXT: %44 = neura.grant_predicate %38, %40 : !neura.data, !neura.data -> !neura.data +// DATAFLOW-NEXT: %45 = "neura.and"(%44) {rhs_value = 255 : i32} : (!neura.data) -> !neura.data +// DATAFLOW-NEXT: neura.return_value %45 : !neura.data +// DATAFLOW-NEXT: neura.yield +// DATAFLOW-NEXT: } \ No newline at end of file diff --git a/test/c2llvm2mlir/nested_loop/kernel.cpp b/test/Conversion/c2llvm2mlir/nested_loop/kernel.cpp similarity index 100% rename from test/c2llvm2mlir/nested_loop/kernel.cpp rename to test/Conversion/c2llvm2mlir/nested_loop/kernel.cpp diff --git a/test/c2llvm2mlir/nested_loop/test.mlir b/test/Conversion/c2llvm2mlir/nested_loop/test.mlir similarity index 94% rename from test/c2llvm2mlir/nested_loop/test.mlir rename to test/Conversion/c2llvm2mlir/nested_loop/test.mlir index 3bf536ff..599875a6 100644 --- a/test/c2llvm2mlir/nested_loop/test.mlir +++ b/test/Conversion/c2llvm2mlir/nested_loop/test.mlir @@ -22,7 +22,7 @@ // RUN: --fold-constant \ // RUN: --insert-data-mov \ // RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=simple" \ -// RUN: --architecture-spec=../../arch_spec/architecture.yaml %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP +// RUN: --architecture-spec=../../../arch_spec/architecture.yaml %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP // CHECK-LLVM2NEURA: accelerator = "neura" // CHECK-LLVM2NEURA: %25 = neura.alloca %24 : !neura.data -> !neura.data diff --git a/test/c2llvm2mlir/simple_loop/kernel.cpp b/test/Conversion/c2llvm2mlir/simple_loop/kernel.cpp similarity index 100% rename from test/c2llvm2mlir/simple_loop/kernel.cpp rename to test/Conversion/c2llvm2mlir/simple_loop/kernel.cpp diff --git a/test/c2llvm2mlir/simple_loop/test.mlir b/test/Conversion/c2llvm2mlir/simple_loop/test.mlir similarity index 96% rename from test/c2llvm2mlir/simple_loop/test.mlir rename to test/Conversion/c2llvm2mlir/simple_loop/test.mlir index 2af2d0c6..5ee4d6e7 100644 --- a/test/c2llvm2mlir/simple_loop/test.mlir +++ b/test/Conversion/c2llvm2mlir/simple_loop/test.mlir @@ -29,7 +29,7 @@ // RUN: --leverage-predicated-value \ // RUN: --transform-ctrl-to-data-flow \ // RUN: --view-op-graph \ -// RUN: --architecture-spec=../../arch_spec/architecture.yaml \ +// RUN: --architecture-spec=../../../arch_spec/architecture.yaml \ // RUN: --insert-data-mov %t-kernel.mlir -o %t-kernel-neura.mlir // RUN: FileCheck %s --check-prefix=CHECK-LLVM2NEURA < %t-kernel-neura.mlir @@ -43,7 +43,7 @@ // RUN: --leverage-predicated-value \ // RUN: --transform-ctrl-to-data-flow \ // RUN: --view-op-graph \ -// RUN: --architecture-spec=../../arch_spec/architecture.yaml \ +// RUN: --architecture-spec=../../../arch_spec/architecture.yaml \ // RUN: --insert-data-mov \ // RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized=5,3 dump-mapping-table=true" %t-kernel.mlir -o %t-kernel-mapped.mlir 2>&1 | tee %t-kernel-mapping-output.txt // RUN: FileCheck %s --check-prefix=CHECK-MAPPING-TABLE < %t-kernel-mapping-output.txt diff --git a/test/Conversion/TosaToTaskflow/affine-to-taskflow.mlir b/test/Conversion/tosa2taskflow/affine-to-taskflow.mlir similarity index 100% rename from test/Conversion/TosaToTaskflow/affine-to-taskflow.mlir rename to test/Conversion/tosa2taskflow/affine-to-taskflow.mlir diff --git a/test/Conversion/TosaToTaskflow/tosa-fusion.mlir b/test/Conversion/tosa2taskflow/tosa-fusion.mlir similarity index 100% rename from test/Conversion/TosaToTaskflow/tosa-fusion.mlir rename to test/Conversion/tosa2taskflow/tosa-fusion.mlir diff --git a/test/Conversion/TosaToTaskflow/tosa-opt.mlir b/test/Conversion/tosa2taskflow/tosa-opt.mlir similarity index 100% rename from test/Conversion/TosaToTaskflow/tosa-opt.mlir rename to test/Conversion/tosa2taskflow/tosa-opt.mlir diff --git a/test/Conversion/TosaToTaskflow/tosa-to-affine.mlir b/test/Conversion/tosa2taskflow/tosa-to-affine.mlir similarity index 100% rename from test/Conversion/TosaToTaskflow/tosa-to-affine.mlir rename to test/Conversion/tosa2taskflow/tosa-to-affine.mlir diff --git a/test/Conversion/TosaToTaskflow/tosa-to-taskflow.mlir b/test/Conversion/tosa2taskflow/tosa-to-taskflow.mlir similarity index 100% rename from test/Conversion/TosaToTaskflow/tosa-to-taskflow.mlir rename to test/Conversion/tosa2taskflow/tosa-to-taskflow.mlir