coredac · tancheng · Sep 27, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td
@@ -81,8 +81,8 @@ def Neura_FMulOp : Op<NeuraDialect, "fmul"> {
 
 def Neura_FDivOp : Op<NeuraDialect, "fdiv"> {
   let summary = "Floating division operation";
-  let arguments = (ins AnyFloat:$lhs, AnyFloat:$rhs, Optional<AnyType>:$predicate);
-  let results = (outs AnyFloat:$result);
+  let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
+  let results = (outs AnyType:$result);
   // let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)";
 }
 
@@ -211,6 +211,54 @@ def Neura_CastOp : Op<NeuraDialect, "cast">{
   // let assemblyFormat = "$input type($input) `->` type($output) `,` $predicate attr-dict";
 }
 
+// Defines an alloca operation for memory allocation.
+def Neura_AllocaOp : Op<NeuraDialect, "alloca"> {
+  let summary = "Memory allocation operation";
+  let description = [{
+    Allocates memory on the stack, similar to llvm.alloca.
+    Takes a predicated size value and returns a pointer to the allocated memory.
+
+    Example:
+      %ptr = neura.alloca %size : !neura.data<i32, i1> -> !llvm.ptr
+  }];
+
+  let arguments = (ins AnyType:$size);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "$size attr-dict `:` type($size) `->` type($result)";
+}
+
+// Defines a sign extension operation.
+def Neura_SExtOp : Op<NeuraDialect, "sext"> {
+  let summary = "Sign extension operation";
+  let description = [{
+    Sign extends a value from a smaller integer type to a larger integer type.
+    Similar to llvm.sext, but works with predicated values.
+
+    Example:
+      %extended = neura.sext %value : !neura.data<i8, i1> -> !neura.data<i32, i1>
+  }];
+
+  let arguments = (ins AnyType:$value);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)";
+}
+
+// Defines a zero extension operation.
+def Neura_ZExtOp : Op<NeuraDialect, "zext"> {
+  let summary = "Zero extension operation";
+  let description = [{
+    Zero extends a value from a smaller integer type to a larger integer type.
+    Similar to llvm.zext, but works with predicated values.
+
+    Example:
+      %extended = neura.zext %value : !neura.data<i8, i1> -> !neura.data<i32, i1>
+  }];
+
+  let arguments = (ins AnyType:$value);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)";
+}
+
 // ----------------------------------------------------
 // Defines vector operations.
 

diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
@@ -160,8 +160,7 @@ struct ArithFDivToNeuraFDiv : public OpRewritePattern<mlir::arith::DivFOp> {
     Type result_type = op.getType();
 
     // Optional predicate: default to null.
-    rewriter.replaceOpWithNewOp<neura::FDivOp>(op, result_type, lhs, rhs,
-                                               nullptr);
+    rewriter.replaceOpWithNewOp<neura::FDivOp>(op, result_type, lhs, rhs);
     return success();
   }
 };

diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
@@ -304,6 +304,155 @@ struct LlvmConstantToNeuraConstant : public OpRewritePattern<LLVM::ConstantOp> {
   }
 };
 
+struct LlvmAllocaToNeuraAlloca : public OpRewritePattern<LLVM::AllocaOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::AllocaOp op,
+                                PatternRewriter &rewriter) const override {
+    Value size = op.getArraySize();
+    Type resultType = op.getType();
+
+    // Convert the size to neura.data<i32, i1> if it's not already
+    // For simplicity, we'll assume the size is already in the right format
+    // In practice, you might need to handle type conversion here
+
+    rewriter.replaceOpWithNewOp<neura::AllocaOp>(op, resultType, size);
+    return success();
+  }
+};
+
+struct LlvmSExtToNeuraSExt : public OpRewritePattern<LLVM::SExtOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::SExtOp op,
+                                PatternRewriter &rewriter) const override {
+    Value input = op.getArg();
+    Type resultType = op.getType();
+
+    rewriter.replaceOpWithNewOp<neura::SExtOp>(op, resultType, input);
+    return success();
+  }
+};
+
+struct LlvmZExtToNeuraZExt : public OpRewritePattern<LLVM::ZExtOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::ZExtOp op,
+                                PatternRewriter &rewriter) const override {
+    Value input = op.getArg();
+    Type resultType = op.getType();
+
+    rewriter.replaceOpWithNewOp<neura::ZExtOp>(op, resultType, input);
+    return success();
+  }
+};
+
+struct LlvmMulToNeuraMul : public OpRewritePattern<LLVM::MulOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::MulOp op,
+                                PatternRewriter &rewriter) const override {
+    Value lhs = op.getLhs();
+    Value rhs = op.getRhs();
+    Type resultType = op.getType();
+
+    rewriter.replaceOpWithNewOp<neura::MulOp>(op, resultType, lhs, rhs);
+    return success();
+  }
+};
+
+struct LlvmFuncToNeuraFunc : public OpRewritePattern<LLVM::LLVMFuncOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::LLVMFuncOp op,
+                                PatternRewriter &rewriter) const override {
+
+
+    auto target = op->getAttrOfType<StringAttr>(mlir::accel::kAcceleratorAttr);
+    if (!target || target.getValue() != mlir::accel::kNeuraTarget) {
+      return failure();
+    }
+
+    // Convert LLVMFunctionType to FunctionType
+    auto llvmFuncType = op.getFunctionType();
+    auto funcType = rewriter.getFunctionType(
+        llvmFuncType.getParams(),
+        llvmFuncType.getReturnType()
+    );
+
+    // Create the new func.func operation using OperationState to have full control
+    OperationState state(op.getLoc(), func::FuncOp::getOperationName());
+    state.addAttribute("sym_name", rewriter.getStringAttr(op.getName()));
+    state.addAttribute("function_type", TypeAttr::get(funcType));
+
+    // Copy ALL attributes from the original llvm.func exactly as they are
+    // Skip function type and name attributes as they are handled separately
+    SmallVector<NamedAttribute> attrs;
+    for (auto attr : op->getAttrs()) {
+      if (attr.getName() == "function_type" || attr.getName() == "sym_name") {
+        continue;
+      }
+      attrs.push_back(attr);
+    }
+    state.addAttributes(attrs);
+
+    // Add the function body region
+    state.addRegion();
+
+    auto newFunc = cast<func::FuncOp>(rewriter.create(state));
+
+    // Move the function body
+    rewriter.inlineRegionBefore(op.getBody(), newFunc.getBody(), newFunc.getBody().end());
+
+    // Replace the old function
+    rewriter.replaceOp(op, newFunc);
+    return success();
+  }
+};
+
+struct LlvmCallToFuncCall : public OpRewritePattern<LLVM::CallOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LLVM::CallOp op,
+                                PatternRewriter &rewriter) const override {
+    // Get the callee name
+    auto callee = op.getCallee();
+    if (!callee) {
+      return failure();
+    }
+
+    // Checks if the callee function exists as func.func in the module.
+    ModuleOp module = op->getParentOfType<ModuleOp>();
+    if (!module) {
+      return failure();
+    }
+
+    // Look for a func.func with the same name
+    func::FuncOp funcOp = module.lookupSymbol<func::FuncOp>(callee.value());
+    if (!funcOp) {
+      return failure();
+    }
+
+    // Get the result types from the function signature
+    auto resultTypes = funcOp.getFunctionType().getResults();
+
+    // Convert the call to func.call
+    auto newCall = rewriter.create<func::CallOp>(
+        op.getLoc(), resultTypes, callee.value(), op.getArgOperands()
+    );
+
+    // Replace the old call with the new one
+    // Handle both cases: calls with results and calls without results
+    if (op.getNumResults() == 0) {
+      rewriter.eraseOp(op);
+    } else {
+      rewriter.replaceOp(op, newCall->getResults());
+    }
+
+    return success();
+  }
+};
+
 struct LowerLlvmToNeuraPass
     : public PassWrapper<LowerLlvmToNeuraPass, OperationPass<ModuleOp>> {
 
@@ -316,6 +465,7 @@ struct LowerLlvmToNeuraPass
 
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<mlir::neura::NeuraDialect>();
+    registry.insert<mlir::func::FuncDialect>();
   }
 
   void runOnOperation() override {
@@ -338,11 +488,24 @@ struct LowerLlvmToNeuraPass
     patterns.add<LlvmReturnToNeuraReturn>(&getContext());
     patterns.add<FuncReturnToNeuraReturn>(&getContext());
     patterns.add<LlvmFSubToNeuraFSub>(&getContext());
+    patterns.add<LlvmAllocaToNeuraAlloca>(&getContext());
+    patterns.add<LlvmSExtToNeuraSExt>(&getContext());
+    patterns.add<LlvmZExtToNeuraZExt>(&getContext());
+    patterns.add<LlvmMulToNeuraMul>(&getContext());
+    patterns.add<LlvmFuncToNeuraFunc>(&getContext());
+    patterns.add<LlvmCallToFuncCall>(&getContext());
 
     FrozenRewritePatternSet frozen(std::move(patterns));
 
     ModuleOp module_op = getOperation();
 
+    // function-level conversions
+    if (failed(applyPatternsGreedily(module_op, frozen))) {
+      signalPassFailure();
+      return;
+    }
+
+    // operation-level conversions
     // Applies to every region inside the module (regardless of func type,
     // e.g., mlir func or llvm func).
     module_op.walk([&](FunctionOpInterface func) {

diff --git a/test/c2llvm2mlir/nested_loop/kernel.cpp b/test/c2llvm2mlir/nested_loop/kernel.cpp
@@ -0,0 +1,46 @@
+// RUN: mlir-neura-opt %s | FileCheck %s
+
+#include <stdio.h>
+
+#define NTAPS 32
+
+int input[NTAPS] = {
+1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1
+};
+int output[NTAPS];
+int coefficients[NTAPS] = {25, 150, 375, -225, 50, 75, -300, 125,
+25, 150, 375, -225, 50, 75, -300, 125,
+25, 150, 375, -225, 50, 75, -300, 125,
+25, 150, 375, -225, 50, 75, -300, 125};
+
+void kernel(int input[], int output[], int coefficient[]);
+
+int main()
+{
+
+//  input_dsp (input, NTAPS, 0);
+
+  kernel(input, output, coefficients);
+
+//  output_dsp (input, NTAPS, 0);
+//  output_dsp (coefficients, NTAPS, 0);
+//  output_dsp (output, NTAPS, 0);
+  printf("output: %d\n", output[0]);
+  return 0;
+}
+
+/*   input :           input sample array */
+/*   output:           output sample array */
+/*   coefficient:      coefficient array */
+void kernel(int input[], int output[], int coefficient[]) {
+  int i, j;
+
+   for (i = 0; i < NTAPS; ++i) {
+        for (j = 0; j < NTAPS; ++j) {
+            output[j] += input[i] * coefficient[i];
+        }
+    }
+}
diff --git a/test/c2llvm2mlir/nested_loop/test.mlir b/test/c2llvm2mlir/nested_loop/test.mlir
@@ -0,0 +1,27 @@
+// RUN: clang++ -S -emit-llvm kernel.cpp -o kernel.ll
+// RUN: mlir-translate --import-llvm kernel.ll -o kernel.mlir
+
+// RUN: mlir-neura-opt --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --canonicalize-live-in \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --fold-constant \
+// RUN:   --insert-data-mov kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA
+
+// RUN: mlir-neura-opt --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --canonicalize-live-in \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --fold-constant \
+// RUN:   --insert-data-mov \
+// RUN:   --map-to-accelerator="mapping-strategy=heuristic backtrack-config=simple" kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP
+
+// CHECK-LLVM2NEURA: accelerator = "neura"
+// CHECK-LLVM2NEURA: %25 = neura.alloca %24 : !neura.data<i32, i1> -> !neura.data<!llvm.ptr, i1>
+// CHECK-LLVM2NEURA: %38 = "neura.phi"(%36, %37) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
+// CHECK-LLVM2NEURA: %175 = neura.sext %174 : !neura.data<i32, i1> -> !neura.data<i64, i1>
+// CHECK-LLVM2NEURA: %194 = "neura.mul"(%192, %193) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
+
+// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {
diff --git a/test/c2llvm2mlir/kernel.cpp → test/c2llvm2mlir/simple_loop/kernel.cpp b/test/c2llvm2mlir/kernel.cpp → test/c2llvm2mlir/simple_loop/kernel.cpp
diff --git a/test/c2llvm2mlir/test.mlir → test/c2llvm2mlir/simple_loop/test.mlir b/test/c2llvm2mlir/test.mlir → test/c2llvm2mlir/simple_loop/test.mlir
diff --git a/test/neura/for_loop/test.mlir b/test/neura/for_loop/test.mlir
@@ -32,7 +32,8 @@
 // RUN:   --insert-data-mov \
 // RUN:  | FileCheck %s --check-prefix=CHECK-MOV
 
-// CHECK:       llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
+// CHECK:       func.func 
+// CHECK:       accelerator = "neura"
 // CHECK-NEXT:     %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-NEXT:     %1 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-NEXT:     %2 = "neura.constant"() <{predicate = true, value = "%arg2"}> : () -> !neura.data<!llvm.ptr, i1>
@@ -57,7 +58,8 @@
 // CHECK-NEXT:   }
 
 // Verifies the neura ops are generated. And fusion happens.
-// CHECK-FUSED:       llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
+// CHECK-FUSED:       func.func 
+// CHECK-FUSED:       accelerator = "neura"
 // CHECK-FUSED-NEXT:     %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-FUSED-NEXT:     %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-FUSED-NEXT:     %2 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
@@ -107,7 +109,8 @@
 // CHECK-FUSED-NEXT:     "neura.return"() : () -> ()
 // CHECK-FUSED-NEXT:   }
 
-// CHECK-MOV:        llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
+// CHECK-MOV:        func.func 
+// CHECK-MOV:        accelerator = "neura"
 // CHECK-MOV-NEXT:     %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-MOV-NEXT:     %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
 // CHECK-MOV-NEXT:     %2 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>