Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lib/NeuraDialect/Transforms/GenerateCodePass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ static std::string getConstantLiteral(Operation *op) {
return "#" + std::to_string(integer_attr.getInt());
if (auto float_attr = dyn_cast<FloatAttr>(value_attr))
return "#" + std::to_string(float_attr.getValueAsDouble());
//TODO: Issue #154: handle argument situations.
// if (auto string_attr = dyn_cast<StringAttr>(value_attr)) {
// std::string value = string_attr.getValue().str();
// return value;
// }
}
return "#0";
}
Expand All @@ -143,6 +148,9 @@ static std::string getConstantLiteral(Operation *op) {
return "#" + std::to_string(integer_attr.getInt());
if (auto float_attr = dyn_cast<FloatAttr>(constant_value_attr))
return "#" + std::to_string(float_attr.getValueAsDouble());
//TODO: Issue #154: handle argument situations.
// if (auto string_attr = dyn_cast<StringAttr>(constant_value_attr))
// return string_attr.getValue().str();
}

return "";
Expand Down
85 changes: 46 additions & 39 deletions lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ mlir::neura::BaseTopology parseTopologyString(const std::string& topology_str) {
}

// Helper function to parse architecture YAML configuration.
bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height,
bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height, int &max_ii,
mlir::neura::TileDefaults &tile_defaults,
std::vector<mlir::neura::TileOverride> &tile_overrides,
mlir::neura::LinkDefaults &link_defaults,
Expand All @@ -550,66 +550,71 @@ bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height,
return false;
}

auto *rootMap = llvm::dyn_cast<llvm::yaml::MappingNode>(root);
if (!rootMap) {
auto *root_map = llvm::dyn_cast<llvm::yaml::MappingNode>(root);
if (!root_map) {
llvm::errs() << "[MapToAcceleratorPass] YAML root is not a mapping\n";
return false;
}

// Iterate root mapping ONCE; find 'architecture' and 'tile_defaults'.
for (auto &keyValuePair : *rootMap) {
auto *keyNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(keyValuePair.getKey());
if (!keyNode) continue;
for (auto &key_value_pair : *root_map) {
auto *key_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(key_value_pair.getKey());
if (!key_node) continue;

llvm::SmallString<64> keyString;
llvm::StringRef keyRef = keyNode->getValue(keyString);
llvm::SmallString<64> key_string;
llvm::StringRef key_ref = key_node->getValue(key_string);

if (keyRef == "architecture") {
auto *architectureMap = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
if (!architectureMap) continue;
if (key_ref == "architecture") {
auto *architecture_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
if (!architecture_map) continue;

// Iterate architecture mapping ONCE; read width/height in the same pass.
for (auto &architectureKeyValuePair : *architectureMap) {
auto *architectureKeyNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architectureKeyValuePair.getKey());
if (!architectureKeyNode) continue;
for (auto &architecture_key_value_pair : *architecture_map) {
auto *architecture_key_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architecture_key_value_pair.getKey());
if (!architecture_key_node) continue;

llvm::SmallString<64> architectureKeyString;
llvm::StringRef architectureKeyRef = architectureKeyNode->getValue(architectureKeyString);
if (architectureKeyRef != "width" && architectureKeyRef != "height") continue;

auto *architectureValueNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architectureKeyValuePair.getValue());
if (!architectureValueNode) continue;

llvm::SmallString<64> architectureValueString;
llvm::StringRef architectureValueRef = architectureValueNode->getValue(architectureValueString);
long long tempValue = 0;
if (!architectureValueRef.getAsInteger(10, tempValue)) {
if (architectureKeyRef == "width") width = static_cast<int>(tempValue);
if (architectureKeyRef == "height") height = static_cast<int>(tempValue);
llvm::SmallString<64> architecture_key_string;
llvm::StringRef architecture_key_ref = architecture_key_node->getValue(architecture_key_string);
if (architecture_key_ref == "width" || architecture_key_ref == "height" || architecture_key_ref == "max_allowed_ii_by_hw") {
auto *architecture_value_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architecture_key_value_pair.getValue());
if (!architecture_value_node) continue;

llvm::SmallString<64> architecture_value_string;
llvm::StringRef architecture_value_ref = architecture_value_node->getValue(architecture_value_string);
long long temp_value = 0;
if (!architecture_value_ref.getAsInteger(10, temp_value)) {
if (architecture_key_ref == "width") width = static_cast<int>(temp_value);
if (architecture_key_ref == "height") height = static_cast<int>(temp_value);
if (architecture_key_ref == "max_allowed_ii_by_hw") {
max_ii = static_cast<int>(temp_value);
}
}
} else {
continue;
}
}
} else if (keyRef == "tile_defaults") {
auto *tile_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
} else if (key_ref == "tile_defaults") {
auto *tile_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
if (tile_defaults_map) {
parseTileDefaults(tile_defaults_map, tile_defaults);
}
} else if (keyRef == "tile_overrides") {
auto *tile_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(keyValuePair.getValue());
} else if (key_ref == "tile_overrides") {
auto *tile_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(key_value_pair.getValue());
if (tile_overrides_seq) {
parseTileOverrides(tile_overrides_seq, tile_overrides);
}
} else if (keyRef == "link_defaults") {
auto *link_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
} else if (key_ref == "link_defaults") {
auto *link_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
if (link_defaults_map) {
parseLinkDefaults(link_defaults_map, link_defaults);
}
} else if (keyRef == "link_overrides") {
auto *link_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(keyValuePair.getValue());
} else if (key_ref == "link_overrides") {
auto *link_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(key_value_pair.getValue());
if (link_overrides_seq) {
parseLinkOverrides(link_overrides_seq, link_overrides);
}
} else if (keyRef == "base_topology") {
auto *topology_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(keyValuePair.getValue());
} else if (key_ref == "base_topology") {
auto *topology_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(key_value_pair.getValue());
if (topology_node) {
llvm::SmallString<64> topology_string;
llvm::StringRef topology_ref = topology_node->getValue(topology_string);
Expand Down Expand Up @@ -739,6 +744,7 @@ struct MapToAcceleratorPass
std::string architecture_spec_file = mlir::neura::getArchitectureSpecFile();
int yaml_width = -1;
int yaml_height = -1;
int yaml_max_ii = 20; // Default max_ii = 20
mlir::neura::TileDefaults yaml_tile_defaults;
std::vector<mlir::neura::TileOverride> tile_overrides;
mlir::neura::LinkDefaults yaml_link_defaults;
Expand Down Expand Up @@ -773,7 +779,7 @@ struct MapToAcceleratorPass
}

// Parse YAML configuration
if (!parseArchitectureYAML(firstDoc, yaml_width, yaml_height, yaml_tile_defaults, tile_overrides, yaml_link_defaults, link_overrides, base_topology)) {
if (!parseArchitectureYAML(firstDoc, yaml_width, yaml_height, yaml_max_ii, yaml_tile_defaults, tile_overrides, yaml_link_defaults, link_overrides, base_topology)) {
return;
}

Expand Down Expand Up @@ -845,7 +851,8 @@ struct MapToAcceleratorPass
int res_mii = calculateResMii(func, architecture);

const int possibleMinII = std::max(rec_mii, res_mii);
constexpr int maxII = 20;
const int maxII = yaml_max_ii; // Use YAML config (default 20 if not specified)

std::vector<Operation *> topologically_sorted_ops =
getTopologicallySortedOps(func);
if (topologically_sorted_ops.empty()) {
Expand Down
5 changes: 3 additions & 2 deletions test/arch_spec/arch_spec_example.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
architecture:
name: "NeuraCGRA"
version: "1.0"
width: 8
height: 8
width: 4
height: 4
max_allowed_ii_by_hw: 20

tile_defaults:
num_registers: 128
Expand Down
1 change: 1 addition & 0 deletions test/arch_spec/architecture.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ architecture:
version: "1.0"
width: 4
height: 4
max_allowed_ii_by_hw: 20

tile_defaults:
num_registers: 32
Expand Down
42 changes: 42 additions & 0 deletions test/testbench/fir/fir.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* 32-tap FIR filter processing 1 point */
/* Modified to use arrays - SMP */

//#include "traps.h"

#define NTAPS 32

float input[NTAPS];
float output[NTAPS];
float coefficients[NTAPS] = {0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25};

void kernel(float input[], float output[], float coefficient[]);

int main()
{

// input_dsp (input, NTAPS, 0);

kernel(input, output, coefficients);

// output_dsp (input, NTAPS, 0);
// output_dsp (coefficients, NTAPS, 0);
// output_dsp (output, NTAPS, 0);
return 0;
}

void kernel(float input[], float output[], float coefficient[])
/* input : input sample array */
/* output: output sample array */
/* coefficient: coefficient array */
{
int i;
float sum = 0.0;
// #pragma clang loop unroll_count(4) vectorize_width(4)
for (i = 0; i < NTAPS; ++i) {
sum += input[i] * coefficient[i];
}
output[0] = sum;
}
15 changes: 15 additions & 0 deletions test/testbench/fir/fir_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#define NTAPS 32

void kernel(float input[], float output[], float coefficient[])
/* input : input sample array */
/* output: output sample array */
/* coefficient: coefficient array */
{
int i;
float sum = 0.0;
// #pragma clang loop unroll_count(4) vectorize_width(4)
for (i = 0; i < NTAPS; ++i) {
sum += input[i] * coefficient[i];
}
output[0] = sum;
}
44 changes: 44 additions & 0 deletions test/testbench/fir/fir_kernel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; ModuleID = 'fir_kernel.cpp'
source_filename = "fir_kernel.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @_Z6kernelPfS_S_(ptr nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef readonly %2) local_unnamed_addr #0 {
br label %4

4: ; preds = %3, %4
%5 = phi i64 [ 0, %3 ], [ %13, %4 ]
%6 = phi float [ 0.000000e+00, %3 ], [ %12, %4 ]
%7 = getelementptr inbounds nuw float, ptr %0, i64 %5
%8 = load float, ptr %7, align 4, !tbaa !5
%9 = getelementptr inbounds nuw float, ptr %2, i64 %5
%10 = load float, ptr %9, align 4, !tbaa !5
%11 = fmul fast float %10, %8
%12 = fadd fast float %11, %6
%13 = add nuw nsw i64 %5, 1
%14 = icmp eq i64 %13, 32
br i1 %14, label %15, label %4, !llvm.loop !9

15: ; preds = %4
store float %12, ptr %1, align 4, !tbaa !5
ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "approx-func-fp-math"="true" "min-legal-vector-width"="0" "no-builtin-fma" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-fma" "tune-cpu"="generic" "unsafe-fp-math"="true" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
!9 = distinct !{!9, !10, !11}
!10 = !{!"llvm.loop.mustprogress"}
!11 = !{!"llvm.loop.unroll.disable"}
28 changes: 28 additions & 0 deletions test/testbench/fir/fir_kernel.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#loop_unroll = #llvm.loop_unroll<disable = true>
#tbaa_root = #llvm.tbaa_root<id = "Simple C++ TBAA">
#loop_annotation = #llvm.loop_annotation<unroll = #loop_unroll, mustProgress = true>
#tbaa_type_desc = #llvm.tbaa_type_desc<id = "omnipotent char", members = {<#tbaa_root, 0>}>
#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "float", members = {<#tbaa_type_desc, 0>}>
#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc1, access_type = #tbaa_type_desc1, offset = 0>
module attributes {dlti.dl_spec = #dlti.dl_spec<i64 = dense<64> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.writeonly}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {approx_func_fp_math = true, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_infs_fp_math = true, no_nans_fp_math = true, no_signed_zeros_fp_math = true, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], "no-builtin-fma", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87", "-amx-avx512", "-avx10.1-256", "-avx10.1-512", "-avx10.2-256", "-avx10.2-512", "-avx512bf16", "-avx512bitalg", "-avx512bw", "-avx512cd", "-avx512dq", "-avx512f", "-avx512fp16", "-avx512ifma", "-avx512vbmi", "-avx512vbmi2", "-avx512vl", "-avx512vnni", "-avx512vp2intersect", "-avx512vpopcntdq", "-fma"]>, tune_cpu = "generic", unsafe_fp_math = true} {
%0 = llvm.mlir.constant(0 : i64) : i64
%1 = llvm.mlir.constant(0.000000e+00 : f32) : f32
%2 = llvm.mlir.constant(1 : i64) : i64
%3 = llvm.mlir.constant(32 : i64) : i64
llvm.br ^bb1(%0, %1 : i64, f32)
^bb1(%4: i64, %5: f32): // 2 preds: ^bb0, ^bb1
%6 = llvm.getelementptr inbounds %arg0[%4] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%7 = llvm.load %6 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%8 = llvm.getelementptr inbounds %arg2[%4] : (!llvm.ptr, i64) -> !llvm.ptr, f32
%9 = llvm.load %8 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
%10 = llvm.fmul %9, %7 {fastmathFlags = #llvm.fastmath<fast>} : f32
%11 = llvm.fadd %10, %5 {fastmathFlags = #llvm.fastmath<fast>} : f32
%12 = llvm.add %4, %2 overflow<nsw, nuw> : i64
%13 = llvm.icmp "eq" %12, %3 : i64
llvm.cond_br %13, ^bb2, ^bb1(%12, %11 : i64, f32) {loop_annotation = #loop_annotation}
^bb2: // pred: ^bb1
llvm.store %11, %arg1 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : f32, !llvm.ptr
llvm.return
}
}
Loading
Loading