Skip to content
52 changes: 50 additions & 2 deletions include/NeuraDialect/NeuraOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def Neura_FMulOp : Op<NeuraDialect, "fmul"> {

def Neura_FDivOp : Op<NeuraDialect, "fdiv"> {
let summary = "Floating division operation";
let arguments = (ins AnyFloat:$lhs, AnyFloat:$rhs, Optional<AnyType>:$predicate);
let results = (outs AnyFloat:$result);
let arguments = (ins AnyType:$lhs, Optional<AnyType>:$rhs);
let results = (outs AnyType:$result);
// let assemblyFormat = "$lhs `,` $rhs `,` $predicate attr-dict `:` type($result)";
}

Expand Down Expand Up @@ -211,6 +211,54 @@ def Neura_CastOp : Op<NeuraDialect, "cast">{
// let assemblyFormat = "$input type($input) `->` type($output) `,` $predicate attr-dict";
}

// Defines an alloca operation for memory allocation.
def Neura_AllocaOp : Op<NeuraDialect, "alloca"> {
let summary = "Memory allocation operation";
let description = [{
Allocates memory on the stack, similar to llvm.alloca.
Takes a predicated size value and returns a pointer to the allocated memory.

Example:
%ptr = neura.alloca %size : !neura.data<i32, i1> -> !llvm.ptr
}];

let arguments = (ins AnyType:$size);
let results = (outs AnyType:$result);
let assemblyFormat = "$size attr-dict `:` type($size) `->` type($result)";
}

// Defines a sign extension operation.
def Neura_SExtOp : Op<NeuraDialect, "sext"> {
let summary = "Sign extension operation";
let description = [{
Sign extends a value from a smaller integer type to a larger integer type.
Similar to llvm.sext, but works with predicated values.

Example:
%extended = neura.sext %value : !neura.data<i8, i1> -> !neura.data<i32, i1>
}];

let arguments = (ins AnyType:$value);
let results = (outs AnyType:$result);
let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)";
}

// Defines a zero extension operation.
def Neura_ZExtOp : Op<NeuraDialect, "zext"> {
let summary = "Zero extension operation";
let description = [{
Zero extends a value from a smaller integer type to a larger integer type.
Similar to llvm.zext, but works with predicated values.

Example:
%extended = neura.zext %value : !neura.data<i8, i1> -> !neura.data<i32, i1>
}];

let arguments = (ins AnyType:$value);
let results = (outs AnyType:$result);
let assemblyFormat = "$value attr-dict `:` type($value) `->` type($result)";
}

// ----------------------------------------------------
// Defines vector operations.

Expand Down
3 changes: 1 addition & 2 deletions lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@ struct ArithFDivToNeuraFDiv : public OpRewritePattern<mlir::arith::DivFOp> {
Type result_type = op.getType();

// Optional predicate: default to null.
rewriter.replaceOpWithNewOp<neura::FDivOp>(op, result_type, lhs, rhs,
nullptr);
rewriter.replaceOpWithNewOp<neura::FDivOp>(op, result_type, lhs, rhs);
return success();
}
};
Expand Down
163 changes: 163 additions & 0 deletions lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,155 @@ struct LlvmConstantToNeuraConstant : public OpRewritePattern<LLVM::ConstantOp> {
}
};

struct LlvmAllocaToNeuraAlloca : public OpRewritePattern<LLVM::AllocaOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::AllocaOp op,
PatternRewriter &rewriter) const override {
Value size = op.getArraySize();
Type resultType = op.getType();

// Convert the size to neura.data<i32, i1> if it's not already
// For simplicity, we'll assume the size is already in the right format
// In practice, you might need to handle type conversion here

rewriter.replaceOpWithNewOp<neura::AllocaOp>(op, resultType, size);
return success();
}
};

struct LlvmSExtToNeuraSExt : public OpRewritePattern<LLVM::SExtOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::SExtOp op,
PatternRewriter &rewriter) const override {
Value input = op.getArg();
Type resultType = op.getType();

rewriter.replaceOpWithNewOp<neura::SExtOp>(op, resultType, input);
return success();
}
};

struct LlvmZExtToNeuraZExt : public OpRewritePattern<LLVM::ZExtOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::ZExtOp op,
PatternRewriter &rewriter) const override {
Value input = op.getArg();
Type resultType = op.getType();

rewriter.replaceOpWithNewOp<neura::ZExtOp>(op, resultType, input);
return success();
}
};

struct LlvmMulToNeuraMul : public OpRewritePattern<LLVM::MulOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::MulOp op,
PatternRewriter &rewriter) const override {
Value lhs = op.getLhs();
Value rhs = op.getRhs();
Type resultType = op.getType();

rewriter.replaceOpWithNewOp<neura::MulOp>(op, resultType, lhs, rhs);
return success();
}
};

struct LlvmFuncToNeuraFunc : public OpRewritePattern<LLVM::LLVMFuncOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::LLVMFuncOp op,
PatternRewriter &rewriter) const override {


auto target = op->getAttrOfType<StringAttr>(mlir::accel::kAcceleratorAttr);
if (!target || target.getValue() != mlir::accel::kNeuraTarget) {
return failure();
}

// Convert LLVMFunctionType to FunctionType
auto llvmFuncType = op.getFunctionType();
auto funcType = rewriter.getFunctionType(
llvmFuncType.getParams(),
llvmFuncType.getReturnType()
);

// Create the new func.func operation using OperationState to have full control
OperationState state(op.getLoc(), func::FuncOp::getOperationName());
state.addAttribute("sym_name", rewriter.getStringAttr(op.getName()));
state.addAttribute("function_type", TypeAttr::get(funcType));

// Copy ALL attributes from the original llvm.func exactly as they are
// Skip function type and name attributes as they are handled separately
SmallVector<NamedAttribute> attrs;
for (auto attr : op->getAttrs()) {
if (attr.getName() == "function_type" || attr.getName() == "sym_name") {
continue;
}
attrs.push_back(attr);
}
state.addAttributes(attrs);

// Add the function body region
state.addRegion();

auto newFunc = cast<func::FuncOp>(rewriter.create(state));

// Move the function body
rewriter.inlineRegionBefore(op.getBody(), newFunc.getBody(), newFunc.getBody().end());

// Replace the old function
rewriter.replaceOp(op, newFunc);
return success();
}
};

struct LlvmCallToFuncCall : public OpRewritePattern<LLVM::CallOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(LLVM::CallOp op,
PatternRewriter &rewriter) const override {
// Get the callee name
auto callee = op.getCallee();
if (!callee) {
return failure();
}

// Checks if the callee function exists as func.func in the module.
ModuleOp module = op->getParentOfType<ModuleOp>();
if (!module) {
return failure();
}

// Look for a func.func with the same name
func::FuncOp funcOp = module.lookupSymbol<func::FuncOp>(callee.value());
if (!funcOp) {
return failure();
}

// Get the result types from the function signature
auto resultTypes = funcOp.getFunctionType().getResults();

// Convert the call to func.call
auto newCall = rewriter.create<func::CallOp>(
op.getLoc(), resultTypes, callee.value(), op.getArgOperands()
);

// Replace the old call with the new one
// Handle both cases: calls with results and calls without results
if (op.getNumResults() == 0) {
rewriter.eraseOp(op);
} else {
rewriter.replaceOp(op, newCall->getResults());
}

return success();
}
};

struct LowerLlvmToNeuraPass
: public PassWrapper<LowerLlvmToNeuraPass, OperationPass<ModuleOp>> {

Expand All @@ -316,6 +465,7 @@ struct LowerLlvmToNeuraPass

void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<mlir::neura::NeuraDialect>();
registry.insert<mlir::func::FuncDialect>();
}

void runOnOperation() override {
Expand All @@ -338,11 +488,24 @@ struct LowerLlvmToNeuraPass
patterns.add<LlvmReturnToNeuraReturn>(&getContext());
patterns.add<FuncReturnToNeuraReturn>(&getContext());
patterns.add<LlvmFSubToNeuraFSub>(&getContext());
patterns.add<LlvmAllocaToNeuraAlloca>(&getContext());
patterns.add<LlvmSExtToNeuraSExt>(&getContext());
patterns.add<LlvmZExtToNeuraZExt>(&getContext());
patterns.add<LlvmMulToNeuraMul>(&getContext());
patterns.add<LlvmFuncToNeuraFunc>(&getContext());
patterns.add<LlvmCallToFuncCall>(&getContext());

FrozenRewritePatternSet frozen(std::move(patterns));

ModuleOp module_op = getOperation();

// function-level conversions
if (failed(applyPatternsGreedily(module_op, frozen))) {
signalPassFailure();
return;
}

// operation-level conversions
// Applies to every region inside the module (regardless of func type,
// e.g., mlir func or llvm func).
module_op.walk([&](FunctionOpInterface func) {
Expand Down
46 changes: 46 additions & 0 deletions test/c2llvm2mlir/nested_loop/kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: mlir-neura-opt %s | FileCheck %s

#include <stdio.h>

#define NTAPS 32

int input[NTAPS] = {
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1
};
int output[NTAPS];
int coefficients[NTAPS] = {25, 150, 375, -225, 50, 75, -300, 125,
25, 150, 375, -225, 50, 75, -300, 125,
25, 150, 375, -225, 50, 75, -300, 125,
25, 150, 375, -225, 50, 75, -300, 125};

void kernel(int input[], int output[], int coefficient[]);

int main()
{

// input_dsp (input, NTAPS, 0);

kernel(input, output, coefficients);

// output_dsp (input, NTAPS, 0);
// output_dsp (coefficients, NTAPS, 0);
// output_dsp (output, NTAPS, 0);
printf("output: %d\n", output[0]);
return 0;
}

/* input : input sample array */
/* output: output sample array */
/* coefficient: coefficient array */
void kernel(int input[], int output[], int coefficient[]) {
int i, j;

for (i = 0; i < NTAPS; ++i) {
for (j = 0; j < NTAPS; ++j) {
output[j] += input[i] * coefficient[i];
}
}
}
27 changes: 27 additions & 0 deletions test/c2llvm2mlir/nested_loop/test.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: clang++ -S -emit-llvm kernel.cpp -o kernel.ll
// RUN: mlir-translate --import-llvm kernel.ll -o kernel.mlir

// RUN: mlir-neura-opt --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --canonicalize-live-in \
// RUN: --leverage-predicated-value \
// RUN: --transform-ctrl-to-data-flow \
// RUN: --fold-constant \
// RUN: --insert-data-mov kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA

// RUN: mlir-neura-opt --assign-accelerator \
// RUN: --lower-llvm-to-neura \
// RUN: --canonicalize-live-in \
// RUN: --leverage-predicated-value \
// RUN: --transform-ctrl-to-data-flow \
// RUN: --fold-constant \
// RUN: --insert-data-mov \
// RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=simple" kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP

// CHECK-LLVM2NEURA: accelerator = "neura"
// CHECK-LLVM2NEURA: %25 = neura.alloca %24 : !neura.data<i32, i1> -> !neura.data<!llvm.ptr, i1>
// CHECK-LLVM2NEURA: %38 = "neura.phi"(%36, %37) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
// CHECK-LLVM2NEURA: %175 = neura.sext %174 : !neura.data<i32, i1> -> !neura.data<i64, i1>
// CHECK-LLVM2NEURA: %194 = "neura.mul"(%192, %193) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>

// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {
File renamed without changes.
File renamed without changes.
9 changes: 6 additions & 3 deletions test/neura/for_loop/test.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
// RUN: --insert-data-mov \
// RUN: | FileCheck %s --check-prefix=CHECK-MOV

// CHECK: llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
// CHECK: func.func
// CHECK: accelerator = "neura"
// CHECK-NEXT: %0 = "neura.constant"() <{predicate = true, value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-NEXT: %1 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-NEXT: %2 = "neura.constant"() <{predicate = true, value = "%arg2"}> : () -> !neura.data<!llvm.ptr, i1>
Expand All @@ -57,7 +58,8 @@
// CHECK-NEXT: }

// Verifies the neura ops are generated. And fusion happens.
// CHECK-FUSED: llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
// CHECK-FUSED: func.func
// CHECK-FUSED: accelerator = "neura"
// CHECK-FUSED-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-FUSED-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-FUSED-NEXT: %2 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
Expand Down Expand Up @@ -107,7 +109,8 @@
// CHECK-FUSED-NEXT: "neura.return"() : () -> ()
// CHECK-FUSED-NEXT: }

// CHECK-MOV: llvm.func local_unnamed_addr @_Z6kernelPfS_S_(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) attributes {accelerator = "neura", memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["mustprogress", "nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic"} {
// CHECK-MOV: func.func
// CHECK-MOV: accelerator = "neura"
// CHECK-MOV-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-MOV-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
// CHECK-MOV-NEXT: %2 = "neura.constant"() <{predicate = true, value = "%arg1"}> : () -> !neura.data<!llvm.ptr, i1>
Expand Down
Loading