Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3b19db4
Add histogram testbench files
n0thingNoob Oct 17, 2025
6da8b5c
Fix conversion for llvm.fdiv and llvm.fptosi, add e2e/histogram kerne…
n0thingNoob Oct 18, 2025
40aa68f
Delete test/testbench/histogram/histogram.cpp
n0thingNoob Oct 18, 2025
4b9f1c6
Delete test/testbench/histogram/histogram_kernel_neura.mlir
n0thingNoob Oct 18, 2025
3cd3901
Update test/e2e/histogram/histogram_kernel.mlir
n0thingNoob Oct 18, 2025
cdb66da
Delete test/testbench/histogram/histogram_kernel.cpp
n0thingNoob Oct 18, 2025
d694f69
Delete test/testbench/histogram/histogram_kernel.ll
n0thingNoob Oct 18, 2025
1b854f3
Delete test/testbench/histogram/histogram_kernel.mlir
n0thingNoob Oct 18, 2025
4fb939a
Clean up .gitmodules by removing duplicates
n0thingNoob Oct 18, 2025
71c28c3
add fir and modify LlvmToNeuraPass.cpp for llvm.fmuladd conversion
n0thingNoob Oct 18, 2025
180c3ef
Add FIR kernel support and llvm.fmuladd conversion
n0thingNoob Oct 18, 2025
f70a11e
Merge remote-tracking branch 'origin/testbench'
n0thingNoob Oct 18, 2025
411b066
Clean up repository: remove temporary and generated files
n0thingNoob Oct 19, 2025
9a846b1
Fix Neura_OrOp type definition to support neura.data types
n0thingNoob Oct 19, 2025
e73bf40
Remove FFT and fusion test files
n0thingNoob Oct 19, 2025
10dfd4b
remove histogram.cpp
n0thingNoob Oct 19, 2025
e59f4de
remove ll file
n0thingNoob Oct 19, 2025
5a4c2ff
rm testbench folder
n0thingNoob Oct 19, 2025
096359f
backup for fir kernel and histogram kernel
n0thingNoob Oct 19, 2025
db4e012
Use llvm extract to extract kernel from benchmarks
n0thingNoob Oct 19, 2025
ebcf014
unify the kernel name in the llvm extract command
n0thingNoob Oct 19, 2025
d4314ae
add issue link to mlir file
n0thingNoob Oct 19, 2025
a83fe7f
Fix GitHub CI: Add LLVM tools to PATH for llvm-extract
n0thingNoob Oct 19, 2025
811a674
Add TODO, remove redundant file
n0thingNoob Oct 19, 2025
6f683fa
rm extra file
n0thingNoob Oct 19, 2025
bf45e98
upload gitignore and remove the unnecessary mlir.llvm in test
n0thingNoob Oct 19, 2025
3176f6f
rm adding the build/bin to PATH
n0thingNoob Oct 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[submodule "test/CGRA-Bench"]
path = test/CGRA-Bench
path = test/benchmark/CGRA-Bench/CGRA-Bench
url = https://github.com/tancheng/CGRA-Bench
[submodule "test/benchmark"]
path = test/benchmark
url = https://github.com/tancheng/CGRA-Bench.git
[submodule "test/benchmark/CGRA-Bench"]
path = test/benchmark/CGRA-Bench
url = https://github.com/tancheng/CGRA-Bench.git
11 changes: 9 additions & 2 deletions include/NeuraDialect/NeuraOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,16 @@ def Neura_FMulOp : Op<NeuraDialect, "fmul"> {

def Neura_FDivOp : Op<NeuraDialect, "fdiv"> {
let summary = "Floating division operation";
let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
let description = [{
Performs a floating-point division operation, computing the result of
a / b, where / is the floating-point division operator.

Example:
%result = neura.fdiv %a, %b : f32
}];
let arguments = (ins AnyType:$lhs, AnyType:$rhs);
let results = (outs AnyType:$result);
// let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)";
let traits = [SameOperandsAndResultElementType];
}

// Defines a bitwise OR operation.
Expand Down
35 changes: 35 additions & 0 deletions lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,39 @@ struct LlvmSRemToNeuraRem : public OpRewritePattern<LLVM::SRemOp> {
}
};

struct LlvmFDivToNeuraFDiv : public OpRewritePattern<mlir::LLVM::FDivOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(mlir::LLVM::FDivOp op,
PatternRewriter &rewriter) const override {
Value lhs = op->getOperand(0);
Value rhs = op->getOperand(1);
Type result_type = op->getResult(0).getType();

// Only matches scalar float.
if (!mlir::isa<FloatType>(result_type))
return failure();

rewriter.replaceOpWithNewOp<neura::FDivOp>(op, result_type, lhs, rhs);
return success();
}
};

struct LlvmFPToSIToNeuraCast : public OpRewritePattern<mlir::LLVM::FPToSIOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(mlir::LLVM::FPToSIOp op,
PatternRewriter &rewriter) const override {
Value input = op.getArg();
Type result_type = op.getType();

// Create a cast operation with "fptosi" as the cast type
rewriter.replaceOpWithNewOp<neura::CastOp>(op, result_type, input,
rewriter.getStringAttr("fptosi"));
return success();
}
};

struct LlvmVFMulToNeuraVFMul : public OpRewritePattern<mlir::LLVM::FMulOp> {
using OpRewritePattern::OpRewritePattern;

Expand Down Expand Up @@ -533,6 +566,8 @@ struct LowerLlvmToNeuraPass
patterns.add<LlvmShlToNeuraShl>(&getContext());
patterns.add<LlvmSDivToNeuraDiv>(&getContext());
patterns.add<LlvmSRemToNeuraRem>(&getContext());
patterns.add<LlvmFDivToNeuraFDiv>(&getContext());
patterns.add<LlvmFPToSIToNeuraCast>(&getContext());

FrozenRewritePatternSet frozen(std::move(patterns));

Expand Down
16 changes: 16 additions & 0 deletions test/e2e/histogram/histogram_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#define DATA_LEN 20
#define BUCKET_LEN 5
#define MIN 1.0
#define MAX 19.0

void kernel(float input[], int histogram[]) {
int i;
float dmin = (float)MIN;
float delt = (float)(MAX - dmin);

for (i = 0; i < DATA_LEN; i++) {
float r = BUCKET_LEN * (input[i] - dmin) / delt;
int b = (int)(r);
histogram[b]++;
}
}
60 changes: 60 additions & 0 deletions test/e2e/histogram/histogram_kernel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; ModuleID = 'histogram_kernel.cpp'
source_filename = "histogram_kernel.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @_Z6kernelPfPi(ptr nocapture noundef readonly %0, ptr nocapture noundef %1) local_unnamed_addr #0 {
br label %3

3: ; preds = %3, %2
%4 = phi i64 [ 0, %2 ], [ %26, %3 ]
%5 = getelementptr inbounds nuw float, ptr %0, i64 %4
%6 = load float, ptr %5, align 4, !tbaa !5
%7 = fadd float %6, -1.000000e+00
%8 = fmul float %7, 5.000000e+00
%9 = fdiv float %8, 1.800000e+01
%10 = fptosi float %9 to i32
%11 = sext i32 %10 to i64
%12 = getelementptr inbounds i32, ptr %1, i64 %11
%13 = load i32, ptr %12, align 4, !tbaa !9
%14 = add nsw i32 %13, 1
store i32 %14, ptr %12, align 4, !tbaa !9
%15 = or disjoint i64 %4, 1
%16 = getelementptr inbounds nuw float, ptr %0, i64 %15
%17 = load float, ptr %16, align 4, !tbaa !5
%18 = fadd float %17, -1.000000e+00
%19 = fmul float %18, 5.000000e+00
%20 = fdiv float %19, 1.800000e+01
%21 = fptosi float %20 to i32
%22 = sext i32 %21 to i64
%23 = getelementptr inbounds i32, ptr %1, i64 %22
%24 = load i32, ptr %23, align 4, !tbaa !9
%25 = add nsw i32 %24, 1
store i32 %25, ptr %23, align 4, !tbaa !9
%26 = add nuw nsw i64 %4, 2
%27 = icmp eq i64 %26, 20
br i1 %27, label %28, label %3, !llvm.loop !11

28: ; preds = %3
ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !7, i64 0}
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
81 changes: 81 additions & 0 deletions test/e2e/histogram/histogram_kernel.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// RUN: mlir-neura-opt %s \
// RUN: --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --canonicalize-live-in \
// RUN: --leverage-predicated-value \
// RUN: --transform-ctrl-to-data-flow \
// RUN: --promote-func-arg-to-const \
// RUN: --insert-data-mov \
// RUN: --map-to-accelerator="mapping-strategy=heuristic" \
// RUN: --architecture-spec=../../arch_spec/architecture.yaml \
// RUN: --generate-code -o %t-mapping.mlir
// RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING
// RUN: FileCheck %s --input-file=tmp-generated-instructions.yaml --check-prefix=YAML
// RUN: FileCheck %s --input-file=tmp-generated-instructions.asm --check-prefix=ASM

// This test verifies the complete compilation pipeline for histogram kernel
// from LLVM IR to Neura dialect with code generation.

#loop_annotation = #llvm.loop_annotation<mustProgress = true>
#tbaa_root = #llvm.tbaa_root<id = "Simple C++ TBAA">
#tbaa_type_desc = #llvm.tbaa_type_desc<id = "omnipotent char", members = {<#tbaa_root, 0>}>
#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "float", members = {<#tbaa_type_desc, 0>}>
#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "int", members = {<#tbaa_type_desc, 0>}>
#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc1, access_type = #tbaa_type_desc1, offset = 0>
#tbaa_tag1 = #llvm.tbaa_tag<base_type = #tbaa_type_desc2, access_type = #tbaa_type_desc2, offset = 0>
module attributes {dlti.dl_spec = #dlti.dl_spec<f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
llvm.func local_unnamed_addr @_Z6kernelPfPi(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
%0 = llvm.mlir.constant(0 : i64) : i64
%1 = llvm.mlir.constant(-1.000000e+00 : f32) : f32
%2 = llvm.mlir.constant(5.000000e+00 : f32) : f32
%3 = llvm.mlir.constant(1.800000e+01 : f32) : f32
%4 = llvm.mlir.constant(1 : i32) : i32
%5 = llvm.mlir.constant(1 : i64) : i64
%6 = llvm.mlir.constant(2 : i64) : i64
%7 = llvm.mlir.constant(20 : i64) : i64
llvm.br ^bb1(%0 : i64)
^bb1(%8: i64): // 2 preds: ^bb0, ^bb1
%9 = llvm.getelementptr inbounds %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%10 = llvm.load %9 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%11 = llvm.fadd %10, %1 : f32
%12 = llvm.fmul %11, %2 : f32
%13 = llvm.fdiv %12, %3 : f32
%14 = llvm.fptosi %13 : f32 to i32
%15 = llvm.sext %14 : i32 to i64
%16 = llvm.getelementptr inbounds %arg1[%15] : (!llvm.ptr, i64) -> !llvm.ptr, i32
%17 = llvm.load %16 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : !llvm.ptr -> i32
%18 = llvm.add %17, %4 overflow<nsw> : i32
llvm.store %18, %16 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : i32, !llvm.ptr
%19 = llvm.or disjoint %8, %5 : i64
%20 = llvm.getelementptr inbounds %arg0[%19] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%21 = llvm.load %20 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%22 = llvm.fadd %21, %1 : f32
%23 = llvm.fmul %22, %2 : f32
%24 = llvm.fdiv %23, %3 : f32
%25 = llvm.fptosi %24 : f32 to i32
%26 = llvm.sext %25 : i32 to i64
%27 = llvm.getelementptr inbounds %arg1[%26] : (!llvm.ptr, i64) -> !llvm.ptr, i32
%28 = llvm.load %27 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : !llvm.ptr -> i32
%29 = llvm.add %28, %4 overflow<nsw> : i32
llvm.store %29, %27 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : i32, !llvm.ptr
%30 = llvm.add %8, %6 overflow<nsw, nuw> : i64
%31 = llvm.icmp "eq" %30, %7 : i64
llvm.cond_br %31, ^bb2, ^bb1(%30 : i64) {loop_annotation = #loop_annotation}
^bb2: // pred: ^bb1
llvm.return
}
}

// MAPPING: module
// MAPPING: func @_Z6kernelPfPi
// MAPPING: neura.constant
// MAPPING: neura.fdiv
// MAPPING: neura.cast

// YAML: instructions:
// YAML: - opcode: "CONSTANT"
// YAML: - opcode: "FDIV"
// YAML: - opcode: "CAST"

// ASM: PE(0,0):
// ASM: CONSTANT
53 changes: 53 additions & 0 deletions test/testbench/histogram/histogram.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>

#include <string.h>
#include <unistd.h>

#define DATA_LEN 20
#define BUCKET_LEN 5
#define MIN 1.0
#define MAX 19.0

void kernel(float input_data[], int histogram[]);
void output();

float input_data[DATA_LEN] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,14,14,14,14,14,19};
int histogram[BUCKET_LEN] = {0};

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main( int argc, char** argv) {

printf("DATA_LEN %d BUCKET_LEN %d\n",DATA_LEN, BUCKET_LEN);
kernel(input_data, histogram);
output();

return 0;
}

void kernel(float input[], int histogram[]) {
int i;
float dmin = (float)MIN;
float delt = (float)(MAX - dmin);

#pragma clang loop vectorize(enable) vectorize_width(4) unroll_count(4)
for (i = 0; i < DATA_LEN; i++) {
float r = BUCKET_LEN * (input[i] - dmin) / delt;
int b = (int)(r);
histogram[b]++;
}
}

void output() {
printf("len %d\n", BUCKET_LEN);
printf("min %f\n", MIN);
printf("del %f\n", MAX-MIN);
for (int i = 0; i < BUCKET_LEN; i++)
printf("%d ", histogram[i]);
printf("\n");
}
16 changes: 16 additions & 0 deletions test/testbench/histogram/histogram_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#define DATA_LEN 20
#define BUCKET_LEN 5
#define MIN 1.0
#define MAX 19.0

void kernel(float input[], int histogram[]) {
int i;
float dmin = (float)MIN;
float delt = (float)(MAX - dmin);

for (i = 0; i < DATA_LEN; i++) {
float r = BUCKET_LEN * (input[i] - dmin) / delt;
int b = (int)(r);
histogram[b]++;
}
}
60 changes: 60 additions & 0 deletions test/testbench/histogram/histogram_kernel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; ModuleID = 'histogram_kernel.cpp'
source_filename = "histogram_kernel.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @_Z6kernelPfPi(ptr nocapture noundef readonly %0, ptr nocapture noundef %1) local_unnamed_addr #0 {
br label %3

3: ; preds = %3, %2
%4 = phi i64 [ 0, %2 ], [ %26, %3 ]
%5 = getelementptr inbounds nuw float, ptr %0, i64 %4
%6 = load float, ptr %5, align 4, !tbaa !5
%7 = fadd float %6, -1.000000e+00
%8 = fmul float %7, 5.000000e+00
%9 = fdiv float %8, 1.800000e+01
%10 = fptosi float %9 to i32
%11 = sext i32 %10 to i64
%12 = getelementptr inbounds i32, ptr %1, i64 %11
%13 = load i32, ptr %12, align 4, !tbaa !9
%14 = add nsw i32 %13, 1
store i32 %14, ptr %12, align 4, !tbaa !9
%15 = or disjoint i64 %4, 1
%16 = getelementptr inbounds nuw float, ptr %0, i64 %15
%17 = load float, ptr %16, align 4, !tbaa !5
%18 = fadd float %17, -1.000000e+00
%19 = fmul float %18, 5.000000e+00
%20 = fdiv float %19, 1.800000e+01
%21 = fptosi float %20 to i32
%22 = sext i32 %21 to i64
%23 = getelementptr inbounds i32, ptr %1, i64 %22
%24 = load i32, ptr %23, align 4, !tbaa !9
%25 = add nsw i32 %24, 1
store i32 %25, ptr %23, align 4, !tbaa !9
%26 = add nuw nsw i64 %4, 2
%27 = icmp eq i64 %26, 20
br i1 %27, label %28, label %3, !llvm.loop !11

28: ; preds = %3
ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !7, i64 0}
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
Loading
Loading