Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions test/testbench/histogram/histogram.cpp
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why upload this? You already upload the entire CGRA-Bench in last PR.

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>

#include <string.h>
#include <unistd.h>

#define DATA_LEN 20
#define BUCKET_LEN 5
#define MIN 1.0
#define MAX 19.0

void kernel(float input_data[], int histogram[]);
void output();

float input_data[DATA_LEN] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,14,14,14,14,14,19};
int histogram[BUCKET_LEN] = {0};

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main( int argc, char** argv) {

printf("DATA_LEN %d BUCKET_LEN %d\n",DATA_LEN, BUCKET_LEN);
kernel(input_data, histogram);
output();

return 0;
}

void kernel(float input[], int histogram[]) {
int i;
float dmin = (float)MIN;
float delt = (float)(MAX - dmin);

#pragma clang loop vectorize(enable) vectorize_width(4) unroll_count(4)
for (i = 0; i < DATA_LEN; i++) {
float r = BUCKET_LEN * (input[i] - dmin) / delt;
int b = (int)(r);
histogram[b]++;
}
}

void output() {
printf("len %d\n", BUCKET_LEN);
printf("min %f\n", MIN);
printf("del %f\n", MAX-MIN);
for (int i = 0; i < BUCKET_LEN; i++)
printf("%d ", histogram[i]);
printf("\n");
}
16 changes: 16 additions & 0 deletions test/testbench/histogram/histogram_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#define DATA_LEN 20
#define BUCKET_LEN 5
#define MIN 1.0
#define MAX 19.0

void kernel(float input[], int histogram[]) {
int i;
float dmin = (float)MIN;
float delt = (float)(MAX - dmin);

for (i = 0; i < DATA_LEN; i++) {
float r = BUCKET_LEN * (input[i] - dmin) / delt;
int b = (int)(r);
histogram[b]++;
}
}
60 changes: 60 additions & 0 deletions test/testbench/histogram/histogram_kernel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; ModuleID = 'histogram_kernel.cpp'
source_filename = "histogram_kernel.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @_Z6kernelPfPi(ptr nocapture noundef readonly %0, ptr nocapture noundef %1) local_unnamed_addr #0 {
br label %3

3: ; preds = %3, %2
%4 = phi i64 [ 0, %2 ], [ %26, %3 ]
%5 = getelementptr inbounds nuw float, ptr %0, i64 %4
%6 = load float, ptr %5, align 4, !tbaa !5
%7 = fadd float %6, -1.000000e+00
%8 = fmul float %7, 5.000000e+00
%9 = fdiv float %8, 1.800000e+01
%10 = fptosi float %9 to i32
%11 = sext i32 %10 to i64
%12 = getelementptr inbounds i32, ptr %1, i64 %11
%13 = load i32, ptr %12, align 4, !tbaa !9
%14 = add nsw i32 %13, 1
store i32 %14, ptr %12, align 4, !tbaa !9
%15 = or disjoint i64 %4, 1
%16 = getelementptr inbounds nuw float, ptr %0, i64 %15
%17 = load float, ptr %16, align 4, !tbaa !5
%18 = fadd float %17, -1.000000e+00
%19 = fmul float %18, 5.000000e+00
%20 = fdiv float %19, 1.800000e+01
%21 = fptosi float %20 to i32
%22 = sext i32 %21 to i64
%23 = getelementptr inbounds i32, ptr %1, i64 %22
%24 = load i32, ptr %23, align 4, !tbaa !9
%25 = add nsw i32 %24, 1
store i32 %25, ptr %23, align 4, !tbaa !9
%26 = add nuw nsw i64 %4, 2
%27 = icmp eq i64 %26, 20
br i1 %27, label %28, label %3, !llvm.loop !11

28: ; preds = %3
ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !7, i64 0}
!11 = distinct !{!11, !12}
!12 = !{!"llvm.loop.mustprogress"}
49 changes: 49 additions & 0 deletions test/testbench/histogram/histogram_kernel.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#loop_annotation = #llvm.loop_annotation<mustProgress = true>
#tbaa_root = #llvm.tbaa_root<id = "Simple C++ TBAA">
#tbaa_type_desc = #llvm.tbaa_type_desc<id = "omnipotent char", members = {<#tbaa_root, 0>}>
#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "float", members = {<#tbaa_type_desc, 0>}>
#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "int", members = {<#tbaa_type_desc, 0>}>
#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc1, access_type = #tbaa_type_desc1, offset = 0>
#tbaa_tag1 = #llvm.tbaa_tag<base_type = #tbaa_type_desc2, access_type = #tbaa_type_desc2, offset = 0>
module attributes {dlti.dl_spec = #dlti.dl_spec<f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
llvm.func local_unnamed_addr @_Z6kernelPfPi(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) attributes {memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
%0 = llvm.mlir.constant(0 : i64) : i64
%1 = llvm.mlir.constant(-1.000000e+00 : f32) : f32
%2 = llvm.mlir.constant(5.000000e+00 : f32) : f32
%3 = llvm.mlir.constant(1.800000e+01 : f32) : f32
%4 = llvm.mlir.constant(1 : i32) : i32
%5 = llvm.mlir.constant(1 : i64) : i64
%6 = llvm.mlir.constant(2 : i64) : i64
%7 = llvm.mlir.constant(20 : i64) : i64
llvm.br ^bb1(%0 : i64)
^bb1(%8: i64): // 2 preds: ^bb0, ^bb1
%9 = llvm.getelementptr inbounds %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%10 = llvm.load %9 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%11 = llvm.fadd %10, %1 : f32
%12 = llvm.fmul %11, %2 : f32
%13 = llvm.fdiv %12, %3 : f32
%14 = llvm.fptosi %13 : f32 to i32
%15 = llvm.sext %14 : i32 to i64
%16 = llvm.getelementptr inbounds %arg1[%15] : (!llvm.ptr, i64) -> !llvm.ptr, i32
%17 = llvm.load %16 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : !llvm.ptr -> i32
%18 = llvm.add %17, %4 overflow<nsw> : i32
llvm.store %18, %16 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : i32, !llvm.ptr
%19 = llvm.or disjoint %8, %5 : i64
%20 = llvm.getelementptr inbounds %arg0[%19] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%21 = llvm.load %20 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%22 = llvm.fadd %21, %1 : f32
%23 = llvm.fmul %22, %2 : f32
%24 = llvm.fdiv %23, %3 : f32
%25 = llvm.fptosi %24 : f32 to i32
%26 = llvm.sext %25 : i32 to i64
%27 = llvm.getelementptr inbounds %arg1[%26] : (!llvm.ptr, i64) -> !llvm.ptr, i32
%28 = llvm.load %27 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : !llvm.ptr -> i32
%29 = llvm.add %28, %4 overflow<nsw> : i32
llvm.store %29, %27 {alignment = 4 : i64, tbaa = [#tbaa_tag1]} : i32, !llvm.ptr
%30 = llvm.add %8, %6 overflow<nsw, nuw> : i64
%31 = llvm.icmp "eq" %30, %7 : i64
llvm.cond_br %31, ^bb2, ^bb1(%30 : i64) {loop_annotation = #loop_annotation}
^bb2: // pred: ^bb1
llvm.return
}
}
43 changes: 43 additions & 0 deletions test/testbench/histogram/histogram_kernel_neura.mlir
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You only need to have a histogram_test.mlir in this folder and include the command starting with // RUN: , and check whatever you wanna check. WDYT?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we should have the entire cpp code. So just the kernel's mlir file would be fine?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You already have that, right?

Screenshot 2025-10-17 at 4 37 26 PM

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You already have that, right?

Screenshot 2025-10-17 at 4 37 26 PM

Yes, but I have some other testbenches that are not included in this repo. Should I upload those c++ files?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So only mlir file?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • For other .cc not included in the CGRA-Bench, you can create another folder like benchmarks, and then put them inside it, you can also put CGRA-Bench inside it. So it would look like:
    • dataflow/test/benchmarks/a.cpp
    • dataflow/test/benchmarks/b.cpp
    • dataflow/test/benchmarks/CGRA-Bench
  • for testing, you can create another folder, e.g., e2e, then mimic testhttps://github.com/coredac/dataflow/blob/main/test/neura/for_loop/relu_test.mlir, and provide your testing_a.mlir inside the test/e2e

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename your testbench as e2e

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
module attributes {dlti.dl_spec = #dlti.dl_spec<f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
func.func @_Z6kernelPfPi(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", linkage = #llvm.linkage<external>, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
%0 = "neura.constant"() <{value = 0 : i64}> : () -> i64
%1 = "neura.constant"() <{value = -1.000000e+00 : f32}> : () -> f32
%2 = "neura.constant"() <{value = 5.000000e+00 : f32}> : () -> f32
%3 = "neura.constant"() <{value = 1.800000e+01 : f32}> : () -> f32
%4 = "neura.constant"() <{value = 1 : i32}> : () -> i32
%5 = "neura.constant"() <{value = 1 : i64}> : () -> i64
%6 = "neura.constant"() <{value = 2 : i64}> : () -> i64
%7 = "neura.constant"() <{value = 20 : i64}> : () -> i64
neura.br %0 : i64 to ^bb1
^bb1(%8: i64): // 2 preds: ^bb0, ^bb1
%9 = "neura.gep"(%arg0, %8) : (!llvm.ptr, i64) -> !llvm.ptr
%10 = "neura.load"(%9) : (!llvm.ptr) -> f32
%11 = "neura.fadd"(%10, %1) : (f32, f32) -> f32
%12 = "neura.fmul"(%11, %2) : (f32, f32) -> f32
%13 = llvm.fdiv %12, %3 : f32
%14 = llvm.fptosi %13 : f32 to i32
Comment on lines +17 to +18
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These LLVM dialect operations should be converted to Neura dialect operations. The llvm.fdiv should be neura.fdiv and llvm.fptosi should be neura.fptosi to maintain consistency with the rest of the converted code.

Copilot uses AI. Check for mistakes.
%15 = neura.sext %14 : i32 -> i64
%16 = "neura.gep"(%arg1, %15) : (!llvm.ptr, i64) -> !llvm.ptr
%17 = "neura.load"(%16) : (!llvm.ptr) -> i32
%18 = "neura.add"(%17, %4) : (i32, i32) -> i32
"neura.store"(%18, %16) : (i32, !llvm.ptr) -> ()
%19 = "neura.or"(%8, %5) : (i64, i64) -> i64
%20 = "neura.gep"(%arg0, %19) : (!llvm.ptr, i64) -> !llvm.ptr
%21 = "neura.load"(%20) : (!llvm.ptr) -> f32
%22 = "neura.fadd"(%21, %1) : (f32, f32) -> f32
%23 = "neura.fmul"(%22, %2) : (f32, f32) -> f32
%24 = llvm.fdiv %23, %3 : f32
%25 = llvm.fptosi %24 : f32 to i32
Comment on lines +29 to +30
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These LLVM dialect operations should be converted to Neura dialect operations. The llvm.fdiv should be neura.fdiv and llvm.fptosi should be neura.fptosi to maintain consistency with the rest of the converted code.

Copilot uses AI. Check for mistakes.
%26 = neura.sext %25 : i32 -> i64
%27 = "neura.gep"(%arg1, %26) : (!llvm.ptr, i64) -> !llvm.ptr
%28 = "neura.load"(%27) : (!llvm.ptr) -> i32
%29 = "neura.add"(%28, %4) : (i32, i32) -> i32
"neura.store"(%29, %27) : (i32, !llvm.ptr) -> ()
%30 = "neura.add"(%8, %6) : (i64, i64) -> i64
%31 = "neura.icmp"(%30, %7) <{cmpType = "eq"}> : (i64, i64) -> i1
neura.cond_br %31 : i1 then to ^bb2 else %30 : i64 to ^bb1
^bb2: // pred: ^bb1
"neura.return"() : () -> ()
}
}

Loading