coredac · tancheng · Oct 17, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025
diff --git a/lib/NeuraDialect/Transforms/GenerateCodePass.cpp b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp
@@ -133,6 +133,11 @@ static std::string getConstantLiteral(Operation *op) {
         return "#" + std::to_string(integer_attr.getInt());
       if (auto float_attr = dyn_cast<FloatAttr>(value_attr))
         return "#" + std::to_string(float_attr.getValueAsDouble());
+      //TODO： Issue #154: handle argument situations.
+      // if (auto string_attr = dyn_cast<StringAttr>(value_attr)) {
+      //   std::string value = string_attr.getValue().str();
+      //   return value;
+      // }
     }
     return "#0";
   }
@@ -143,6 +148,9 @@ static std::string getConstantLiteral(Operation *op) {
       return "#" + std::to_string(integer_attr.getInt());
     if (auto float_attr = dyn_cast<FloatAttr>(constant_value_attr))
       return "#" + std::to_string(float_attr.getValueAsDouble());
+    //TODO： Issue #154: handle argument situations.
+    // if (auto string_attr = dyn_cast<StringAttr>(constant_value_attr))
+    //   return string_attr.getValue().str();
   }
 
   return "";

diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -538,7 +538,7 @@ mlir::neura::BaseTopology parseTopologyString(const std::string& topology_str) {
 }
 
 // Helper function to parse architecture YAML configuration.
-bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height, 
+bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height, int &max_ii,
                           mlir::neura::TileDefaults &tile_defaults,
                           std::vector<mlir::neura::TileOverride> &tile_overrides,
                           mlir::neura::LinkDefaults &link_defaults,
@@ -550,66 +550,71 @@ bool parseArchitectureYAML(llvm::yaml::Document &doc, int &width, int &height,
     return false;
   }
 
-  auto *rootMap = llvm::dyn_cast<llvm::yaml::MappingNode>(root);
-  if (!rootMap) {
+  auto *root_map = llvm::dyn_cast<llvm::yaml::MappingNode>(root);
+  if (!root_map) {
     llvm::errs() << "[MapToAcceleratorPass] YAML root is not a mapping\n";
     return false;
   }
 
   // Iterate root mapping ONCE; find 'architecture' and 'tile_defaults'.
-  for (auto &keyValuePair : *rootMap) {
-    auto *keyNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(keyValuePair.getKey());
-    if (!keyNode) continue;
+  for (auto &key_value_pair : *root_map) {
+    auto *key_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(key_value_pair.getKey());
+    if (!key_node) continue;
 
-    llvm::SmallString<64> keyString;
-    llvm::StringRef keyRef = keyNode->getValue(keyString);
+    llvm::SmallString<64> key_string;
+    llvm::StringRef key_ref = key_node->getValue(key_string);
 
-    if (keyRef == "architecture") {
-      auto *architectureMap = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
-      if (!architectureMap) continue;
+    if (key_ref == "architecture") {
+      auto *architecture_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
+      if (!architecture_map) continue;
 
       // Iterate architecture mapping ONCE; read width/height in the same pass.
-      for (auto &architectureKeyValuePair : *architectureMap) {
-        auto *architectureKeyNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architectureKeyValuePair.getKey());
-        if (!architectureKeyNode) continue;
+      for (auto &architecture_key_value_pair : *architecture_map) {
+        auto *architecture_key_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architecture_key_value_pair.getKey());
+        if (!architecture_key_node) continue;
 
-        llvm::SmallString<64> architectureKeyString;
-        llvm::StringRef architectureKeyRef = architectureKeyNode->getValue(architectureKeyString);
-        if (architectureKeyRef != "width" && architectureKeyRef != "height") continue;
-
-        auto *architectureValueNode = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architectureKeyValuePair.getValue());
-        if (!architectureValueNode) continue;
-
-        llvm::SmallString<64> architectureValueString;
-        llvm::StringRef architectureValueRef = architectureValueNode->getValue(architectureValueString);
-        long long tempValue = 0;
-        if (!architectureValueRef.getAsInteger(10, tempValue)) {
-          if (architectureKeyRef == "width") width = static_cast<int>(tempValue);
-          if (architectureKeyRef == "height") height = static_cast<int>(tempValue);
+        llvm::SmallString<64> architecture_key_string;
+        llvm::StringRef architecture_key_ref = architecture_key_node->getValue(architecture_key_string);
+        if (architecture_key_ref == "width" || architecture_key_ref == "height" || architecture_key_ref == "max_allowed_ii_by_hw") {
+          auto *architecture_value_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(architecture_key_value_pair.getValue());
+          if (!architecture_value_node) continue;
+
+          llvm::SmallString<64> architecture_value_string;
+          llvm::StringRef architecture_value_ref = architecture_value_node->getValue(architecture_value_string);
+          long long temp_value = 0;
+          if (!architecture_value_ref.getAsInteger(10, temp_value)) {
+            if (architecture_key_ref == "width") width = static_cast<int>(temp_value);
+            if (architecture_key_ref == "height") height = static_cast<int>(temp_value);
+            if (architecture_key_ref == "max_allowed_ii_by_hw") {
+              max_ii = static_cast<int>(temp_value);
+            }
+          }
+        } else {
+          continue;
         }
       }
-        } else if (keyRef == "tile_defaults") {
-          auto *tile_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
+        } else if (key_ref == "tile_defaults") {
+          auto *tile_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
           if (tile_defaults_map) {
             parseTileDefaults(tile_defaults_map, tile_defaults);
           }
-        } else if (keyRef == "tile_overrides") {
-          auto *tile_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(keyValuePair.getValue());
+        } else if (key_ref == "tile_overrides") {
+          auto *tile_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(key_value_pair.getValue());
           if (tile_overrides_seq) {
             parseTileOverrides(tile_overrides_seq, tile_overrides);
           }
-        } else if (keyRef == "link_defaults") {
-          auto *link_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(keyValuePair.getValue());
+        } else if (key_ref == "link_defaults") {
+          auto *link_defaults_map = llvm::dyn_cast_or_null<llvm::yaml::MappingNode>(key_value_pair.getValue());
           if (link_defaults_map) {
             parseLinkDefaults(link_defaults_map, link_defaults);
           }
-        } else if (keyRef == "link_overrides") {
-          auto *link_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(keyValuePair.getValue());
+        } else if (key_ref == "link_overrides") {
+          auto *link_overrides_seq = llvm::dyn_cast_or_null<llvm::yaml::SequenceNode>(key_value_pair.getValue());
           if (link_overrides_seq) {
             parseLinkOverrides(link_overrides_seq, link_overrides);
           }
-        } else if (keyRef == "base_topology") {
-          auto *topology_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(keyValuePair.getValue());
+        } else if (key_ref == "base_topology") {
+          auto *topology_node = llvm::dyn_cast_or_null<llvm::yaml::ScalarNode>(key_value_pair.getValue());
           if (topology_node) {
             llvm::SmallString<64> topology_string;
             llvm::StringRef topology_ref = topology_node->getValue(topology_string);
@@ -739,6 +744,7 @@ struct MapToAcceleratorPass
     std::string architecture_spec_file = mlir::neura::getArchitectureSpecFile();
     int yaml_width = -1;
     int yaml_height = -1;
+    int yaml_max_ii = 20;  // Default max_ii = 20
     mlir::neura::TileDefaults yaml_tile_defaults;
     std::vector<mlir::neura::TileOverride> tile_overrides;
     mlir::neura::LinkDefaults yaml_link_defaults;
@@ -773,7 +779,7 @@ struct MapToAcceleratorPass
       }
 
       // Parse YAML configuration
-      if (!parseArchitectureYAML(firstDoc, yaml_width, yaml_height, yaml_tile_defaults, tile_overrides, yaml_link_defaults, link_overrides, base_topology)) {
+      if (!parseArchitectureYAML(firstDoc, yaml_width, yaml_height, yaml_max_ii, yaml_tile_defaults, tile_overrides, yaml_link_defaults, link_overrides, base_topology)) {
         return;
       }
 
@@ -845,7 +851,8 @@ struct MapToAcceleratorPass
       int res_mii = calculateResMii(func, architecture);
 
       const int possibleMinII = std::max(rec_mii, res_mii);
-      constexpr int maxII = 20;
+      const int maxII = yaml_max_ii;  // Use YAML config (default 20 if not specified)
+
       std::vector<Operation *> topologically_sorted_ops =
           getTopologicallySortedOps(func);
       if (topologically_sorted_ops.empty()) {

diff --git a/test/arch_spec/arch_spec_example.yaml b/test/arch_spec/arch_spec_example.yaml
@@ -1,8 +1,9 @@
 architecture:
   name: "NeuraCGRA"
   version: "1.0"
-  width: 8
-  height: 8
+  width: 4
+  height: 4
+  max_allowed_ii_by_hw: 20
 
 tile_defaults:
   num_registers: 128

diff --git a/test/arch_spec/architecture.yaml b/test/arch_spec/architecture.yaml
@@ -3,6 +3,7 @@ architecture:
   version: "1.0"
   width: 4
   height: 4
+  max_allowed_ii_by_hw: 20
 
 tile_defaults:
   num_registers: 32

diff --git a/test/testbench/fir/fir.cpp b/test/testbench/fir/fir.cpp
@@ -0,0 +1,42 @@
+/* 32-tap FIR filter processing 1 point */
+/* Modified to use arrays - SMP */
+
+//#include "traps.h"
+
+#define NTAPS 32
+
+float input[NTAPS];
+float output[NTAPS];
+float coefficients[NTAPS] = {0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
+0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
+0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25,
+0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25};
+
+void kernel(float input[], float output[], float coefficient[]);
+
+int main()
+{
+
+//  input_dsp (input, NTAPS, 0);
+
+  kernel(input, output, coefficients);
+
+//  output_dsp (input, NTAPS, 0);
+//  output_dsp (coefficients, NTAPS, 0);
+//  output_dsp (output, NTAPS, 0);
+  return 0;
+}
+
+void kernel(float input[], float output[], float coefficient[])
+/*   input :           input sample array */
+/*   output:           output sample array */
+/*   coefficient:      coefficient array */
+{
+  int i;
+  float sum = 0.0;
+//   #pragma clang loop unroll_count(4) vectorize_width(4)
+  for (i = 0; i < NTAPS; ++i) {
+    sum += input[i] * coefficient[i];
+  }
+  output[0] = sum;
+}
diff --git a/test/testbench/fir/fir_kernel.cpp b/test/testbench/fir/fir_kernel.cpp
@@ -0,0 +1,15 @@
+#define NTAPS 32
+
+void kernel(float input[], float output[], float coefficient[])
+/*   input :           input sample array */
+/*   output:           output sample array */
+/*   coefficient:      coefficient array */
+{
+  int i;
+  float sum = 0.0;
+//   #pragma clang loop unroll_count(4) vectorize_width(4)
+  for (i = 0; i < NTAPS; ++i) {
+    sum += input[i] * coefficient[i];
+  }
+  output[0] = sum;
+}
diff --git a/test/testbench/fir/fir_kernel.ll b/test/testbench/fir/fir_kernel.ll
@@ -0,0 +1,44 @@
+; ModuleID = 'fir_kernel.cpp'
+source_filename = "fir_kernel.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
+define dso_local void @_Z6kernelPfS_S_(ptr nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef readonly %2) local_unnamed_addr #0 {
+  br label %4
+
+4:                                                ; preds = %3, %4
+  %5 = phi i64 [ 0, %3 ], [ %13, %4 ]
+  %6 = phi float [ 0.000000e+00, %3 ], [ %12, %4 ]
+  %7 = getelementptr inbounds nuw float, ptr %0, i64 %5
+  %8 = load float, ptr %7, align 4, !tbaa !5
+  %9 = getelementptr inbounds nuw float, ptr %2, i64 %5
+  %10 = load float, ptr %9, align 4, !tbaa !5
+  %11 = fmul fast float %10, %8
+  %12 = fadd fast float %11, %6
+  %13 = add nuw nsw i64 %5, 1
+  %14 = icmp eq i64 %13, 32
+  br i1 %14, label %15, label %4, !llvm.loop !9
+
+15:                                               ; preds = %4
+  store float %12, ptr %1, align 4, !tbaa !5
+  ret void
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "approx-func-fp-math"="true" "min-legal-vector-width"="0" "no-builtin-fma" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-fma" "tune-cpu"="generic" "unsafe-fp-math"="true" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{!"clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"float", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
+!9 = distinct !{!9, !10, !11}
+!10 = !{!"llvm.loop.mustprogress"}
+!11 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/testbench/fir/fir_kernel.mlir b/test/testbench/fir/fir_kernel.mlir
@@ -0,0 +1,28 @@
+#loop_unroll = #llvm.loop_unroll<disable = true>
+#tbaa_root = #llvm.tbaa_root<id = "Simple C++ TBAA">
+#loop_annotation = #llvm.loop_annotation<unroll = #loop_unroll, mustProgress = true>
+#tbaa_type_desc = #llvm.tbaa_type_desc<id = "omnipotent char", members = {<#tbaa_root, 0>}>
+#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "float", members = {<#tbaa_type_desc, 0>}>
+#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc1, access_type = #tbaa_type_desc1, offset = 0>
+module attributes {dlti.dl_spec = #dlti.dl_spec<i64 = dense<64> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
+  llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.writeonly}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {approx_func_fp_math = true, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_infs_fp_math = true, no_nans_fp_math = true, no_signed_zeros_fp_math = true, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], "no-builtin-fma", ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87", "-amx-avx512", "-avx10.1-256", "-avx10.1-512", "-avx10.2-256", "-avx10.2-512", "-avx512bf16", "-avx512bitalg", "-avx512bw", "-avx512cd", "-avx512dq", "-avx512f", "-avx512fp16", "-avx512ifma", "-avx512vbmi", "-avx512vbmi2", "-avx512vl", "-avx512vnni", "-avx512vp2intersect", "-avx512vpopcntdq", "-fma"]>, tune_cpu = "generic", unsafe_fp_math = true} {
+    %0 = llvm.mlir.constant(0 : i64) : i64
+    %1 = llvm.mlir.constant(0.000000e+00 : f32) : f32
+    %2 = llvm.mlir.constant(1 : i64) : i64
+    %3 = llvm.mlir.constant(32 : i64) : i64
+    llvm.br ^bb1(%0, %1 : i64, f32)
+  ^bb1(%4: i64, %5: f32):  // 2 preds: ^bb0, ^bb1
+    %6 = llvm.getelementptr inbounds %arg0[%4] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    %7 = llvm.load %6 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
+    %8 = llvm.getelementptr inbounds %arg2[%4] : (!llvm.ptr, i64) -> !llvm.ptr, f32
+    %9 = llvm.load %8 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : !llvm.ptr -> f32
+    %10 = llvm.fmul %9, %7 {fastmathFlags = #llvm.fastmath<fast>} : f32
+    %11 = llvm.fadd %10, %5 {fastmathFlags = #llvm.fastmath<fast>} : f32
+    %12 = llvm.add %4, %2 overflow<nsw, nuw> : i64
+    %13 = llvm.icmp "eq" %12, %3 : i64
+    llvm.cond_br %13, ^bb2, ^bb1(%12, %11 : i64, f32) {loop_annotation = #loop_annotation}
+  ^bb2:  // pred: ^bb1
+    llvm.store %11, %arg1 {alignment = 4 : i64, tbaa = [#tbaa_tag]} : f32, !llvm.ptr
+    llvm.return
+  }
+}