coredac · ShangkunLi · Oct 1, 2025 · Sep 30, 2025
diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td
@@ -43,12 +43,26 @@ def Neura_MulOp : Op<NeuraDialect, "mul"> {
 
 def Neura_DivOp : Op<NeuraDialect, "div"> {
   let summary = "Integer division operation";
-  let arguments = (ins AnyType:$lhs, AnyType:$rhs, Optional<AnyType>:$predicate);
+  let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
   let results = (outs AnyType:$result);
   // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)";
   let traits = [SameOperandsAndResultElementType];
 }
 
+def Neura_RemOp : Op<NeuraDialect, "rem">{
+  let summary = "Integer remainder operation";
+  let description = [{
+    Performs an integer remainder operation, computing the result of
+    a % b, where % is the remainder operator.
+
+    Example:
+      %result = neura.rem %a, %b : i32
+  }];
+  let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
+  let results = (outs AnyType:$result);
+  let traits = [SameOperandsAndResultElementType];
+}
+
 // Defines a floating-point addition operation.
 def Neura_FAddOp : Op<NeuraDialect, "fadd"> {
   let summary = "Floating addition operation";
@@ -63,8 +77,8 @@ def Neura_FAddOp : Op<NeuraDialect, "fadd"> {
 def Neura_FSubOp: Op<NeuraDialect, "fsub"> {
   let summary = "Floating substraction operation";
   let opName = "fsub";
-  let arguments = (ins AnyFloat:$lhs, AnyFloat:$rhs, Optional<AnyType>:$predicate);
-  let results = (outs AnyFloat:$result);
+  let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
+  let results = (outs AnyType:$result);
   // let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)";
   let traits = [SameOperandsAndResultElementType];
 }
@@ -222,9 +236,9 @@ def Neura_AllocaOp : Op<NeuraDialect, "alloca"> {
       %ptr = neura.alloca %size : !neura.data<i32, i1> -> !llvm.ptr
   }];
 
-  let arguments = (ins AnyType:$size);
+  let arguments = (ins Optional<AnyType>:$size);
   let results = (outs AnyType:$result);
-  let assemblyFormat = "$size attr-dict `:` type($size) `->` type($result)";
+  let assemblyFormat = "($size^ `:` type($size))? attr-dict `->` type($result)";
 }
 
 // Defines a sign extension operation.
@@ -259,6 +273,20 @@ def Neura_ZExtOp : Op<NeuraDialect, "zext"> {
   let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)";
 }
 
+// Defines a logical shift left operation.
+def Neura_ShlOp : Op<NeuraDialect, "shl"> {
+  let summary = "Logical shift left operation";
+  let description = [{
+    Performs a logical left shift on an integer value.
+    Similar to llvm.shl, but works with predicated values.
+
+    Example:
+      %shifted = neura.shl %value, %shiftAmount : !neura.data<i32, i1> -> !neura.data<i32, i1>
+  }];
+  let arguments = (ins AnyType:$value, Optional<AnyType>:$shiftAmount);
+  let results = (outs AnyType:$result);
+}
+
 // ----------------------------------------------------
 // Defines vector operations.
 

diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
@@ -98,8 +98,7 @@ struct ArithSubFToNeuraFSub : public OpRewritePattern<mlir::arith::SubFOp> {
     Type result_type = op.getType();
 
     // Optional predicate: default to null.
-    rewriter.replaceOpWithNewOp<neura::FSubOp>(op, result_type, lhs, rhs,
-                                               nullptr);
+    rewriter.replaceOpWithNewOp<neura::FSubOp>(op, result_type, lhs, rhs);
     return success();
   }
 };
@@ -144,8 +143,7 @@ struct ArithDivSIToNeuraDiv : public OpRewritePattern<mlir::arith::DivSIOp> {
     Type result_type = op.getType();
     // Converts arith DivSIOp to Neura DivOp.
     // Optional predicate: default to null.
-    rewriter.replaceOpWithNewOp<neura::DivOp>(op, result_type, lhs, rhs,
-                                              nullptr);
+    rewriter.replaceOpWithNewOp<neura::DivOp>(op, result_type, lhs, rhs);
     return success();
   }
 };
@@ -176,8 +174,7 @@ struct ArithRemSIToNeuraOp : public OpRewritePattern<mlir::arith::RemSIOp> {
     Location loc = op.getLoc();
     // Converts arith RemSIOp to basic Neura Op.
     // Optional predicate: default to null.
-    Value div =
-        rewriter.create<neura::DivOp>(loc, result_type, lhs, rhs, nullptr);
+    Value div = rewriter.create<neura::DivOp>(loc, result_type, lhs, rhs);
     Value mul = rewriter.create<neura::MulOp>(loc, result_type, rhs, div);
     Value rem = rewriter.create<neura::SubOp>(loc, result_type, lhs, mul);
 

diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
@@ -73,8 +73,7 @@ struct LlvmFSubToNeuraFSub : public OpRewritePattern<mlir::LLVM::FSubOp> {
     }
 
     // Sets optional predicate: default to 'none'.
-    rewriter.replaceOpWithNewOp<neura::FSubOp>(op, result_type, lhs, rhs,
-                                               Value());
+    rewriter.replaceOpWithNewOp<neura::FSubOp>(op, result_type, lhs, rhs);
     return success();
   }
 };
@@ -109,6 +108,35 @@ struct LlvmFMulToNeuraFMul : public OpRewritePattern<mlir::LLVM::FMulOp> {
   }
 };
 
+struct LlvmSDivToNeuraDiv : public OpRewritePattern<LLVM::SDivOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::SDivOp op,
+                                PatternRewriter &rewriter) const override {
+    Value lhs = op.getLhs();
+    Value rhs = op.getRhs();
+    Type resultType = op.getType();
+
+    rewriter.replaceOpWithNewOp<neura::DivOp>(op, resultType, lhs, rhs);
+    return success();
+  }
+};
+
+struct LlvmSRemToNeuraRem : public OpRewritePattern<LLVM::SRemOp> {
+  using OpRewritePattern<LLVM::SRemOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::SRemOp op,
+                                PatternRewriter &rewriter) const override {
+    Value lhs = op.getLhs();
+    Value rhs = op.getRhs();
+    Type resultType = op.getType();
+
+    // Create neura.rem operation to replace llvm.srem
+    rewriter.replaceOpWithNewOp<neura::RemOp>(op, resultType, lhs, rhs);
+    return success();
+  }
+};
+
 struct LlvmVFMulToNeuraVFMul : public OpRewritePattern<mlir::LLVM::FMulOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -311,11 +339,11 @@ struct LlvmAllocaToNeuraAlloca : public OpRewritePattern<LLVM::AllocaOp> {
                                 PatternRewriter &rewriter) const override {
     Value size = op.getArraySize();
     Type resultType = op.getType();
-    
+
     // Converts the size to neura.data<i32, i1> if it's not already.
     // Assumes the size is already in the right format.
     // Handles type conversion here.
-    
+
     rewriter.replaceOpWithNewOp<neura::AllocaOp>(op, resultType, size);
     return success();
   }
@@ -328,7 +356,7 @@ struct LlvmSExtToNeuraSExt : public OpRewritePattern<LLVM::SExtOp> {
                                 PatternRewriter &rewriter) const override {
     Value input = op.getArg();
     Type resultType = op.getType();
-    
+
     rewriter.replaceOpWithNewOp<neura::SExtOp>(op, resultType, input);
     return success();
   }
@@ -341,7 +369,7 @@ struct LlvmZExtToNeuraZExt : public OpRewritePattern<LLVM::ZExtOp> {
                                 PatternRewriter &rewriter) const override {
     Value input = op.getArg();
     Type resultType = op.getType();
-    
+
     rewriter.replaceOpWithNewOp<neura::ZExtOp>(op, resultType, input);
     return success();
   }
@@ -355,36 +383,48 @@ struct LlvmMulToNeuraMul : public OpRewritePattern<LLVM::MulOp> {
     Value lhs = op.getLhs();
     Value rhs = op.getRhs();
     Type resultType = op.getType();
-    
+
     rewriter.replaceOpWithNewOp<neura::MulOp>(op, resultType, lhs, rhs);
     return success();
   }
 };
 
+struct LlvmShlToNeuraShl : public OpRewritePattern<LLVM::ShlOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::ShlOp op,
+                                PatternRewriter &rewriter) const override {
+    Value lhs = op.getLhs();
+    Value rhs = op.getRhs();
+    Type resultType = op.getType();
+
+    rewriter.replaceOpWithNewOp<neura::ShlOp>(op, resultType, lhs, rhs);
+    return success();
+  }
+};
+
 struct LlvmFuncToNeuraFunc : public OpRewritePattern<LLVM::LLVMFuncOp> {
   using OpRewritePattern::OpRewritePattern;
 
   LogicalResult matchAndRewrite(LLVM::LLVMFuncOp op,
                                 PatternRewriter &rewriter) const override {
 
-
     auto target = op->getAttrOfType<StringAttr>(mlir::accel::kAcceleratorAttr);
     if (!target || target.getValue() != mlir::accel::kNeuraTarget) {
       return failure();
     }
 
     // Converts LLVMFunctionType to FunctionType.
     auto llvmFuncType = op.getFunctionType();
-    auto funcType = rewriter.getFunctionType(
-        llvmFuncType.getParams(),
-        llvmFuncType.getReturnType()
-    );
+    auto funcType = rewriter.getFunctionType(llvmFuncType.getParams(),
+                                             llvmFuncType.getReturnType());
 
-    // Creates the new func.func operation using OperationState to have full control.
+    // Creates the new func.func operation using OperationState to have full
+    // control.
     OperationState state(op.getLoc(), func::FuncOp::getOperationName());
     state.addAttribute("sym_name", rewriter.getStringAttr(op.getName()));
     state.addAttribute("function_type", TypeAttr::get(funcType));
-    
+
     // Copies ALL attributes from the original llvm.func exactly as they are.
     // Skips function type and name attributes as they are handled separately.
     SmallVector<NamedAttribute> attrs;
@@ -395,15 +435,16 @@ struct LlvmFuncToNeuraFunc : public OpRewritePattern<LLVM::LLVMFuncOp> {
       attrs.push_back(attr);
     }
     state.addAttributes(attrs);
-    
+
     // Adds the function body region.
     state.addRegion();
-    
+
     auto newFunc = cast<func::FuncOp>(rewriter.create(state));
 
     // Moves the function body.
-    rewriter.inlineRegionBefore(op.getBody(), newFunc.getBody(), newFunc.getBody().end());
-
+    rewriter.inlineRegionBefore(op.getBody(), newFunc.getBody(),
+                                newFunc.getBody().end());
+
     // Replaces the old function.
     rewriter.replaceOp(op, newFunc);
     return success();
@@ -435,20 +476,19 @@ struct LlvmCallToFuncCall : public OpRewritePattern<LLVM::CallOp> {
 
     // Gets the result types from the function signature.
     auto resultTypes = funcOp.getFunctionType().getResults();
-    
+
     // Converts the call to func.call.
     auto newCall = rewriter.create<func::CallOp>(
-        op.getLoc(), resultTypes, callee.value(), op.getArgOperands()
-    );
-
+        op.getLoc(), resultTypes, callee.value(), op.getArgOperands());
+
     // Replaces the old call with the new one.
     // Handles both cases: calls with results and calls without results.
     if (op.getNumResults() == 0) {
       rewriter.eraseOp(op);
     } else {
       rewriter.replaceOp(op, newCall->getResults());
     }
-    
+
     return success();
   }
 };
@@ -494,6 +534,9 @@ struct LowerLlvmToNeuraPass
     patterns.add<LlvmMulToNeuraMul>(&getContext());
     patterns.add<LlvmFuncToNeuraFunc>(&getContext());
     patterns.add<LlvmCallToFuncCall>(&getContext());
+    patterns.add<LlvmShlToNeuraShl>(&getContext());
+    patterns.add<LlvmSDivToNeuraDiv>(&getContext());
+    patterns.add<LlvmSRemToNeuraRem>(&getContext());
 
     FrozenRewritePatternSet frozen(std::move(patterns));
 

diff --git a/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp b/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp
@@ -8,6 +8,7 @@
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/LogicalResult.h"
 
 using namespace mlir;
 using namespace mlir::neura;
@@ -46,6 +47,30 @@ struct MemRefStoreLowering : public OpRewritePattern<memref::StoreOp> {
   }
 };
 
+struct MemRefAllocaToNeuraAlloca : public OpRewritePattern<memref::AllocaOp> {
+  using OpRewritePattern<memref::AllocaOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::AllocaOp alloca_op,
+                                PatternRewriter &rewriter) const override {
+    // Gets the result type.
+    Type result_type = alloca_op.getType();
+
+    // Checks if we have dynamic dimensions.
+    if (!alloca_op.getDynamicSizes().empty()) {
+      // For dynamic dimensions, we need to create the alloca with the size
+      // arguments.
+      rewriter.replaceOpWithNewOp<neura::AllocaOp>(alloca_op, result_type,
+                                                   alloca_op.getDynamicSizes());
+    } else {
+      // For static dimensions, we can create the alloca without size arguments.
+      rewriter.replaceOpWithNewOp<neura::AllocaOp>(alloca_op, result_type,
+                                                   Value());
+    }
+
+    return success();
+  }
+};
+
 struct LowerMemRefToNeuraPass
     : public PassWrapper<LowerMemRefToNeuraPass, OperationPass<ModuleOp>> {
 
@@ -64,8 +89,11 @@ struct LowerMemRefToNeuraPass
     ModuleOp module_op = getOperation();
     MLIRContext *context = &getContext();
     RewritePatternSet patterns(&getContext());
+
     patterns.add<MemRefLoadLowering>(context);
     patterns.add<MemRefStoreLowering>(context);
+    patterns.add<MemRefAllocaToNeuraAlloca>(context);
+
     module_op.walk([&](func::FuncOp func_op) {
       if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) {
         auto target =

diff --git a/lib/NeuraDialect/Architecture/Architecture.cpp b/lib/NeuraDialect/Architecture/Architecture.cpp
@@ -209,8 +209,8 @@ Architecture::Architecture(int width, int height) {
     for (int x = 0; x < width; ++x) {
       // Gets the tile by coordinates.
       Tile *tile = getTile(x, y);
-      const int kNUM_REGS_PER_REGFILE = 4;
-      const int kNUM_REGFILES_PER_CLUSTER = 2;
+      const int kNUM_REGS_PER_REGFILE = 8;
+      const int kNUM_REGFILES_PER_CLUSTER = 4;
 
       // Assembles register files into a cluster.
       RegisterFileCluster *register_file_cluster =

diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -173,7 +173,7 @@ struct MapToAcceleratorPass
       int res_mii = calculateResMii(func, architecture);
 
       const int possibleMinII = std::max(rec_mii, res_mii);
-      constexpr int maxII = 15;
+      constexpr int maxII = 20;
       std::vector<Operation *> topologically_sorted_ops =
           getTopologicallySortedOps(func);
       if (topologically_sorted_ops.empty()) {