coredac · YanzhouTang · Oct 20, 2025 · Oct 16, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td
@@ -145,7 +145,7 @@ def Neura_LoadOp : Op<NeuraDialect, "load"> {
 
 // Defines a store operation.
 def Neura_StoreOp : Op<NeuraDialect, "store"> {
-  let arguments = (ins AnyType:$value, AnyType:$addr);
+  let arguments = (ins AnyType:$value, Optional<AnyType>:$addr);
   let results = (outs);
   // let assemblyFormat = "$value `,` $addr `,` attr-dict";
 }
@@ -179,11 +179,11 @@ def Neura_StoreIndexedOp: Op<NeuraDialect, "store_indexed"> {
 }
 
 // Defines a pointer computation operation.
-def Neura_GEP : Op<NeuraDialect, "gep"> {
+def Neura_GEP : Op<NeuraDialect, "gep", [AttrSizedOperandSegments]> {
   let summary = "Pointer computation using offset indices";
-  let arguments = (ins AnyType:$base, Variadic<AnyType>:$indicesAndPredicate);
+  let arguments = (ins Optional<AnyType>:$base, Variadic<AnyType>:$indices);
   let results = (outs AnyType:$result);
-  // let assemblyFormat = "$base `[` $indicesAndPredicate `]` `,` $predicate attr-dict";
+  // let assemblyFormat = "$base `[` $indices `]` `,` $predicate attr-dict";
 }
 
 // Defines a conditional branch operation.

diff --git a/lib/NeuraDialect/Transforms/FusePatternPass.cpp b/lib/NeuraDialect/Transforms/FusePatternPass.cpp
@@ -129,7 +129,7 @@ struct FuseGepLoadPattern : public OpRewritePattern<neura::LoadOp> {
 
     // Creates the fused operation with base and indices from gep.
     SmallVector<Value> indexValues;
-    for (auto gepIndex : gep_op.getIndicesAndPredicate()) {
+    for (auto gepIndex : gep_op.getIndices()) {
       indexValues.push_back(gepIndex);
     }
 
@@ -161,7 +161,7 @@ struct FuseGepStorePattern : public OpRewritePattern<neura::StoreOp> {
 
     // Creates the fused operation with base and indices from gep.
     SmallVector<Value> indexValues;
-    for (auto gepIndex : gep_op.getIndicesAndPredicate()) {
+    for (auto gepIndex : gep_op.getIndices()) {
       indexValues.push_back(gepIndex);
     }
 

diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp
@@ -108,12 +108,12 @@ struct FuseRhsConstantPattern : public OpRewritePattern<OpType> {
 
   virtual Operation *
   createOpWithFusedRhsConstant(OpType op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const = 0;
 
   LogicalResult matchAndRewrite(OpType op,
                                 PatternRewriter &rewriter) const override {
-    if (op->hasAttr("rhs_const_value")) {
+    if (op->hasAttr("rhs_value")) {
       // Already fused with a constant on the right-hand side.
       return failure();
     }
@@ -127,9 +127,9 @@ struct FuseRhsConstantPattern : public OpRewritePattern<OpType> {
     if (rhs_is_const) {
       auto constant_op = dyn_cast<neura::ConstantOp>(rhs.getDefiningOp());
 
-      Attribute rhs_const_value = getOriginConstantValue(rhs);
+      Attribute rhs_value = getOriginConstantValue(rhs);
       Operation *fused_op =
-          createOpWithFusedRhsConstant(op, lhs, rhs_const_value, rewriter);
+          createOpWithFusedRhsConstant(op, lhs, rhs_value, rewriter);
 
       rewriter.replaceOp(op, fused_op->getResults());
       if (constant_op->use_empty()) {
@@ -141,9 +141,9 @@ struct FuseRhsConstantPattern : public OpRewritePattern<OpType> {
     if (lhs_is_const && !rhs_is_const && isCommutative()) {
       auto constant_op = dyn_cast<neura::ConstantOp>(lhs.getDefiningOp());
 
-      Attribute lhs_const_value = getOriginConstantValue(lhs);
+      Attribute lhs_value = getOriginConstantValue(lhs);
       Operation *fused_op =
-          createOpWithFusedRhsConstant(op, rhs, lhs_const_value, rewriter);
+          createOpWithFusedRhsConstant(op, rhs, lhs_value, rewriter);
 
       rewriter.replaceOp(op, fused_op->getResults());
       if (constant_op->use_empty()) {
@@ -163,12 +163,12 @@ struct FuseAddRhsConstantPattern : public FuseRhsConstantPattern<neura::AddOp> {
 
   Operation *
   createOpWithFusedRhsConstant(neura::AddOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::AddOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -178,12 +178,12 @@ struct FuseSubRhsConstantPattern : public FuseRhsConstantPattern<neura::SubOp> {
 
   Operation *
   createOpWithFusedRhsConstant(neura::SubOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::SubOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -195,12 +195,12 @@ struct FuseMulRhsConstantPattern : public FuseRhsConstantPattern<neura::MulOp> {
 
   Operation *
   createOpWithFusedRhsConstant(neura::MulOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::MulOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -211,12 +211,12 @@ struct FuseICmpRhsConstantPattern
 
   Operation *
   createOpWithFusedRhsConstant(neura::ICmpOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::ICmpOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr, op.getCmpType());
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -229,12 +229,12 @@ struct FuseFAddRhsConstantPattern
 
   Operation *
   createOpWithFusedRhsConstant(neura::FAddOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::FAddOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -244,12 +244,12 @@ struct FuseDivRhsConstantPattern : public FuseRhsConstantPattern<neura::DivOp> {
 
   Operation *
   createOpWithFusedRhsConstant(neura::DivOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::DivOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
@@ -259,16 +259,100 @@ struct FuseRemRhsConstantPattern : public FuseRhsConstantPattern<neura::RemOp> {
 
   Operation *
   createOpWithFusedRhsConstant(neura::RemOp op, Value non_const_operand,
-                               Attribute rhs_const_value,
+                               Attribute rhs_value,
                                PatternRewriter &rewriter) const override {
     auto fused_op = rewriter.create<neura::RemOp>(
         op.getLoc(), op.getResult().getType(), non_const_operand,
         /*rhs=*/nullptr);
-    addConstantAttribute(fused_op, "rhs_const_value", rhs_const_value);
+    addConstantAttribute(fused_op, "rhs_value", rhs_value);
     return fused_op;
   }
 };
 
+// =========================================
+// FuseGepBaseConstantPattern
+// Folds constant base pointer for GEP operation.
+// =========================================
+struct FuseGepBaseConstantPattern : public OpRewritePattern<neura::GEP> {
+  using OpRewritePattern<neura::GEP>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(neura::GEP gep_op,
+                                PatternRewriter &rewriter) const override {
+    Value base = gep_op.getBase();
+
+    // Checks if base exists and is a constant.
+    if (!base || !isOriginConstantOp(base)) {
+      return failure();
+    }
+
+    auto constant_op = dyn_cast<neura::ConstantOp>(base.getDefiningOp());
+    Attribute base_value = getOriginConstantValue(base);
+
+    // Gets all indices (everything after base).
+    SmallVector<Value> indices;
+    for (Value operand : gep_op.getIndices()) {
+      indices.push_back(operand);
+    }
+
+    // Creates new GEP with no base but with lhs_value attribute.
+    auto fused_gep = rewriter.create<neura::GEP>(
+        gep_op.getLoc(), 
+        gep_op.getResult().getType(),
+        /*base=*/nullptr,
+        indices);
+    // TODO: Gather all the attribute -- https://github.com/coredac/dataflow/issues/145
+    addConstantAttribute(fused_gep, "lhs_value", base_value);
+
+    // Replaces the original GEP.
+    rewriter.replaceOp(gep_op, fused_gep);
+
+    // Cleans up constant if no longer used.
+    if (constant_op->use_empty()) {
+      rewriter.eraseOp(constant_op);
+    }
+
+    return success();
+  }
+};
+
+// =========================================
+// FuseStoreAddrConstantPattern
+// Folds constant destination pointer for Store operation.
+// =========================================
+struct FuseStoreAddrConstantPattern : public OpRewritePattern<neura::StoreOp> {
+  using OpRewritePattern<neura::StoreOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(neura::StoreOp store_op,
+                                PatternRewriter &rewriter) const override {
+    Value addr = store_op.getAddr();
+
+    // Checks if address exists and is a constant.
+    if (!addr || !isOriginConstantOp(addr)) {
+      return failure();
+    }
+
+    auto constant_op = dyn_cast<neura::ConstantOp>(addr.getDefiningOp());
+    Attribute addr_value = getOriginConstantValue(addr);
+
+    // Creates new Store with no addr but with rhs_value attribute.
+    auto fused_store = rewriter.create<neura::StoreOp>(
+        store_op.getLoc(),
+        store_op.getValue(),  // Keeps the value operand.
+        /*addr=*/nullptr);    // Removes addr operand.
+    addConstantAttribute(fused_store, "rhs_value", addr_value);
+
+    // Replaces the original Store.
+    rewriter.replaceOp(store_op, fused_store);
+
+    // Cleans up constant if no longer used.
+    if (constant_op->use_empty()) {
+      rewriter.eraseOp(constant_op);
+    }
+
+    return success();
+  }
+};
+
 // =========================================
 // FoldConstantPass Implementation
 // =========================================
@@ -294,6 +378,8 @@ struct FoldConstantPass
     patterns.add<FuseRemRhsConstantPattern>(&getContext());
 
     patterns.add<FuseConstantAndGrantPattern>(&getContext());
+    patterns.add<FuseGepBaseConstantPattern>(&getContext());
+    patterns.add<FuseStoreAddrConstantPattern>(&getContext());
     FrozenRewritePatternSet frozen(std::move(patterns));
 
     // Applies to every region inside the module (regardless of func type,

diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwSpecificOpt/FuseLoopControlPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwSpecificOpt/FuseLoopControlPass.cpp
@@ -106,8 +106,8 @@ class LoopInfo {
 // Finds the constant attribute for a value.
 Attribute findConstantAttribute(Operation *op) {
   // Checks if the operation has a constant attribute.
-  if (op && op->hasAttr("rhs_const_value")) {
-    return op->getAttr("rhs_const_value");
+  if (op && op->hasAttr("rhs_value")) {
+    return op->getAttr("rhs_value");
   }
 
   // If the value is already a constant, return it.

diff --git a/test/c2llvm2mlir/simple_loop/test.mlir b/test/c2llvm2mlir/simple_loop/test.mlir
@@ -1,7 +1,7 @@
 // Compiles the original kernel.
 // RUN: clang++ kernel.cpp -o %t-kernel.out
 
-// Compiles the original kernel to mlir, then lower back to llvm, eventually binary.
+// Compiles the original kernel to mlir, then lowers back to llvm, eventually binary.
 // RUN: clang++ -S -emit-llvm -o %t-kernel.ll kernel.cpp
 // RUN: mlir-translate --import-llvm %t-kernel.ll -o %t-kernel.mlir
 // RUN: mlir-opt %t-kernel.mlir | mlir-translate -mlir-to-llvmir -o %t-kernel_back.ll
@@ -15,3 +15,56 @@
 // Verifies the output values are the same for the original and re-compiled kernel.
 // CHECK: output: [[OUTPUT:[0-9]+\.[0-9]+]]
 // CHECK: output: [[OUTPUT]]
+
+// Tests LLVM to NEURA lowering.
+// RUN: clang++ -S -emit-llvm -O3 -fno-unroll-loops -fno-vectorize -ffp-contract=off kernel.cpp -o %t-kernel.ll
+// RUN: mlir-translate --import-llvm %t-kernel.ll -o %t-kernel.mlir
+
+// RUN: mlir-neura-opt --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --promote-func-arg-to-const \
+// RUN:   --fold-constant \
+// RUN:   --canonicalize-live-in \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --view-op-graph \
+// RUN:   --architecture-spec=../../arch_spec/architecture.yaml \
+// RUN:   --insert-data-mov %t-kernel.mlir -o %t-kernel-neura.mlir
+// RUN: FileCheck %s --check-prefix=CHECK-LLVM2NEURA < %t-kernel-neura.mlir
+
+// RUN: mlir-neura-opt --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --promote-func-arg-to-const \
+// RUN:   --fold-constant \
+// RUN:   --canonicalize-live-in \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --view-op-graph \
+// RUN:   --architecture-spec=../../arch_spec/architecture.yaml \
+// RUN:   --insert-data-mov \
+// RUN:   --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized=5,3" %t-kernel.mlir -o %t-kernel-mapped.mlir
+// RUN: FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP < %t-kernel-mapped.mlir
+
+// CHECK-LLVM2NEURA: accelerator = "neura"
+// CHECK-LLVM2NEURA: dataflow_mode = "predicate"
+// CHECK-LLVM2NEURA: neura.phi
+// CHECK-LLVM2NEURA: neura.gep
+// CHECK-LLVM2NEURA-SAME: operandSegmentSizes = array<i32: 0, 1>
+// CHECK-LLVM2NEURA-SAME: lhs_value
+// CHECK-LLVM2NEURA: neura.load
+// CHECK-LLVM2NEURA: neura.fmul
+// CHECK-LLVM2NEURA: neura.fadd
+// CHECK-LLVM2NEURA: neura.store
+// CHECK-LLVM2NEURA-SAME: rhs_value
+
+// CHECK-LLVM2NEURA-MAP:      func.func @
+// CHECK-LLVM2NEURA-MAP-SAME:  accelerator = "neura"
+// CHECK-LLVM2NEURA-MAP-SAME:  dataflow_mode = "predicate"
+// CHECK-LLVM2NEURA-MAP-SAME:  mapping_info = {
+// CHECK-LLVM2NEURA-MAP-SAME:   compiled_ii = 5 : i32, 
+// CHECK-LLVM2NEURA-MAP-SAME:   mapping_mode = "spatial-temporal"
+// CHECK-LLVM2NEURA-MAP-SAME:   mapping_strategy = "heuristic"
+// CHECK-LLVM2NEURA-MAP-SAME:   rec_mii = 5 : i32
+// CHECK-LLVM2NEURA-MAP-SAME:   res_mii = 2 : i32
+// CHECK-LLVM2NEURA-MAP-SAME:   x_tiles = 4 : i32
+// CHECK-LLVM2NEURA-MAP-SAME:   y_tiles = 4 : i32}
diff --git a/test/controflow_fuse/simple_loop/simple_loop.mlir b/test/controflow_fuse/simple_loop/simple_loop.mlir
@@ -196,8 +196,8 @@ module attributes {} {
 // FUSE-NEXT:     %7 = neura.grant_predicate %5, %valid : !neura.data<memref<?xi32>, i1>, !neura.data<i1, i1> -> !neura.data<memref<?xi32>, i1>
 // FUSE-NEXT:     %8 = neura.grant_predicate %3, %valid : !neura.data<memref<?xi32>, i1>, !neura.data<i1, i1> -> !neura.data<memref<?xi32>, i1>
 // FUSE-NEXT:     %9 = neura.load_indexed %7[%nextindex : !neura.data<i64, i1>] !neura.data<memref<?xi32>, i1> : !neura.data<i32, i1>
-// FUSE-NEXT:     %10 = "neura.mul"(%9) {rhs_const_value = 2 : i32} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
-// FUSE-NEXT:     %11 = "neura.add"(%10) {rhs_const_value = 1 : i32} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// FUSE-NEXT:     %10 = "neura.mul"(%9) {rhs_value = 2 : i32} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
+// FUSE-NEXT:     %11 = "neura.add"(%10) {rhs_value = 1 : i32} : (!neura.data<i32, i1>) -> !neura.data<i32, i1>
 // FUSE-NEXT:     neura.store_indexed %11 to %8[%nextindex : !neura.data<i64, i1>] !neura.data<memref<?xi32>, i1> : !neura.data<i32, i1>
 // FUSE-NEXT:     neura.ctrl_mov %7 -> %4 : !neura.data<memref<?xi32>, i1> !neura.data<memref<?xi32>, i1>
 // FUSE-NEXT:     neura.ctrl_mov %8 -> %2 : !neura.data<memref<?xi32>, i1> !neura.data<memref<?xi32>, i1>