Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions include/NeuraDialect/NeuraOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -344,16 +344,67 @@ def VectorOfAnyFloat :
"vector of floats"
>;

def VectorOfAnyInt :
TypeConstraint<
CPred<
"mlir::isa<::mlir::VectorType>($_self) && "
"mlir::isa<::mlir::IntegerType>(mlir::cast<::mlir::VectorType>($_self).getElementType())"
>,
"vector of integers"
>;

// Defines a vector multiplication operation.
def Neura_VFMulOp : Op<NeuraDialect, "vfmul"> {
let summary = "Vector floating multiplication operation";
let opName = "vfmul";
let arguments = (ins VectorOfAnyFloat:$lhs, VectorOfAnyFloat:$rhs);
let results = (outs VectorOfAnyFloat:$result);
let arguments = (ins AnyType:$lhs, AnyType:$rhs);
let results = (outs AnyType:$result);
// let assemblyFormat = "$lhs `,` $rhs `,` attr-dict `:` type($result)";
let traits = [SameOperandsAndResultElementType];
}

// Defines a vector integer multiplication operation.
def Neura_VMulOp : Op<NeuraDialect, "vmul"> {
let summary = "Vector integer multiplication operation";
let opName = "vmul";
let arguments = (ins AnyType:$lhs, AnyType:$rhs);
let results = (outs AnyType:$result);
let traits = [SameOperandsAndResultElementType];
}

// Defines a vector integer addition operation.
def Neura_VAddOp : Op<NeuraDialect, "vadd"> {
let summary = "Vector integer addition operation";
let opName = "vadd";
let arguments = (ins AnyType:$lhs, AnyType:$rhs);
let results = (outs AnyType:$result);
let traits = [SameOperandsAndResultElementType];
}

// Defines a vector floating addition operation.
def Neura_VFAddOp : Op<NeuraDialect, "vfadd"> {
let summary = "Vector floating addition operation";
let opName = "vfadd";
let arguments = (ins AnyType:$lhs, AnyType:$rhs);
let results = (outs AnyType:$result);
let traits = [SameOperandsAndResultElementType];
}

// Defines a vector reduction add operation (reduces vector to scalar).
def Neura_VectorReduceAddOp : Op<NeuraDialect, "vector.reduce.add"> {
let summary = "Vector reduction add operation - reduces vector to scalar by summing elements";
let description = [{
Reduces a vector to a scalar by summing all elements.
Similar to llvm.intr.vector.reduce.add.

Example:
%sum = neura.vector.reduce.add %vec : vector<4xi32> -> i32
}];
let opName = "vector.reduce.add";
let arguments = (ins AnyType:$input);
let results = (outs AnyType:$result);
}

// ----------------------------------------------------
// Defines fused operations.

Expand Down
86 changes: 85 additions & 1 deletion lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,83 @@ struct LlvmVFMulToNeuraVFMul : public OpRewritePattern<mlir::LLVM::FMulOp> {
}
};

struct LlvmVMulToNeuraVMul : public OpRewritePattern<mlir::LLVM::MulOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(mlir::LLVM::MulOp op,
PatternRewriter &rewriter) const override {
Value lhs = op->getOperand(0);
Value rhs = op->getOperand(1);
Type result_type = op->getResult(0).getType();

// Only matches vector<xInt>.
auto vecTy = mlir::dyn_cast<VectorType>(result_type);
if (!vecTy || !mlir::isa<IntegerType>(vecTy.getElementType()))
return failure();

rewriter.replaceOpWithNewOp<neura::VMulOp>(op, result_type, lhs, rhs);
return success();
}
};

struct LlvmVAddToNeuraVAdd : public OpRewritePattern<mlir::LLVM::AddOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(mlir::LLVM::AddOp op,
PatternRewriter &rewriter) const override {
Value lhs = op->getOperand(0);
Value rhs = op->getOperand(1);
Type result_type = op->getResult(0).getType();

// Only matches vector<xInt>.
auto vecTy = mlir::dyn_cast<VectorType>(result_type);
if (!vecTy || !mlir::isa<IntegerType>(vecTy.getElementType()))
return failure();

rewriter.replaceOpWithNewOp<neura::VAddOp>(op, result_type, lhs, rhs);
return success();
}
};

struct LlvmVFAddToNeuraVFAdd : public OpRewritePattern<mlir::LLVM::FAddOp> {
using OpRewritePattern::OpRewritePattern;

LogicalResult matchAndRewrite(mlir::LLVM::FAddOp op,
PatternRewriter &rewriter) const override {
Value lhs = op->getOperand(0);
Value rhs = op->getOperand(1);
Type result_type = op->getResult(0).getType();

// Only matches vector<xf32>.
auto vecTy = mlir::dyn_cast<VectorType>(result_type);
if (!vecTy || !mlir::isa<FloatType>(vecTy.getElementType()))
return failure();

rewriter.replaceOpWithNewOp<neura::VFAddOp>(op, result_type, lhs, rhs);
return success();
}
};

// Handles LLVM intrinsic operations like llvm.intr.vector.reduce.add
// These are generic intrinsic calls, not specific op types
struct LlvmVectorReduceAddToNeuraVectorReduceAdd : public RewritePattern {
LlvmVectorReduceAddToNeuraVectorReduceAdd(MLIRContext *context)
: RewritePattern("llvm.intr.vector.reduce.add", 1, context) {}

LogicalResult matchAndRewrite(Operation *op,
PatternRewriter &rewriter) const override {
// Check that we have exactly one operand and one result
if (op->getNumOperands() != 1 || op->getNumResults() != 1)
return failure();

Value input = op->getOperand(0);
Type result_type = op->getResult(0).getType();

rewriter.replaceOpWithNewOp<neura::VectorReduceAddOp>(op, result_type, input);
return success();
}
};

struct LlvmICmpToNeuraICmp : public OpRewritePattern<LLVM::ICmpOp> {
using OpRewritePattern::OpRewritePattern;

Expand Down Expand Up @@ -637,11 +714,18 @@ struct LowerLlvmToNeuraPass
// Adds DRR patterns.
mlir::neura::llvm2neura::populateWithGenerated(patterns);
patterns.add<LlvmConstantToNeuraConstant>(&getContext());
// Vector operations must be registered before scalar operations
// to ensure vector types are matched first
patterns.add<LlvmVMulToNeuraVMul>(&getContext());
patterns.add<LlvmVAddToNeuraVAdd>(&getContext());
patterns.add<LlvmVFMulToNeuraVFMul>(&getContext());
patterns.add<LlvmVFAddToNeuraVFAdd>(&getContext());
patterns.insert<LlvmVectorReduceAddToNeuraVectorReduceAdd>(&getContext());
// Scalar operations
patterns.add<LlvmAddToNeuraAdd>(&getContext());
patterns.add<LlvmOrToNeuraOr>(&getContext());
patterns.add<LlvmFAddToNeuraFAdd>(&getContext());
patterns.add<LlvmFMulToNeuraFMul>(&getContext());
patterns.add<LlvmVFMulToNeuraVFMul>(&getContext());
patterns.add<LlvmICmpToNeuraICmp>(&getContext());
patterns.add<LlvmFCmpToNeuraFCmp>(&getContext());
patterns.add<LlvmGEPToNeuraGEP>(&getContext());
Expand Down
53 changes: 53 additions & 0 deletions test/e2e/fir/fir_kernel_vec.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Compiles the original C kernel to mlir with vectorization enabled, then lowers it via Neura.
// RUN: clang++ -S -emit-llvm -O3 -fno-unroll-loops -o %t-kernel-full.ll %S/../../benchmark/CGRA-Bench/kernels/fir/fir_int.cpp
// RUN: llvm-extract --rfunc=".*kernel.*" %t-kernel-full.ll -o %t-kernel-only.ll
// RUN: mlir-translate --import-llvm %t-kernel-only.ll -o %t-kernel.mlir

// RUN: mlir-neura-opt %t-kernel.mlir \
// RUN: --assign-accelerator \
// RUN: -o %t-1-assign-accelerator.mlir

// RUN: mlir-neura-opt %t-1-assign-accelerator.mlir \
// RUN: --lower-llvm-to-neura \
// RUN: -o %t-2-lower-llvm-to-neura.mlir

// RUN: mlir-neura-opt %t-2-lower-llvm-to-neura.mlir \
// RUN: --promote-func-arg-to-const \
// RUN: -o %t-3-promote-func-arg-to-const.mlir

// RUN: mlir-neura-opt %t-3-promote-func-arg-to-const.mlir \
// RUN: --fold-constant \
// RUN: -o %t-4-fold-constant.mlir

// RUN: mlir-neura-opt %t-4-fold-constant.mlir \
// RUN: --canonicalize-live-in \
// RUN: -o %t-5-canonicalize-live-in.mlir

// RUN: mlir-neura-opt %t-5-canonicalize-live-in.mlir \
// RUN: --leverage-predicated-value \
// RUN: -o %t-6-leverage-predicated-value.mlir

// RUN: mlir-neura-opt %t-6-leverage-predicated-value.mlir \
// RUN: --transform-ctrl-to-data-flow \
// RUN: -o %t-7-transform-ctrl-to-data-flow.mlir

// RUN: mlir-neura-opt %t-7-transform-ctrl-to-data-flow.mlir \
// RUN: --fold-constant \
// RUN: -o %t-8-fold-constant-2.mlir

// RUN: mlir-neura-opt %t-8-fold-constant-2.mlir \
// RUN: --insert-data-mov \
// RUN: -o %t-9-insert-data-mov.mlir

// RUN: mlir-neura-opt %t-9-insert-data-mov.mlir \
// RUN: --map-to-accelerator="mapping-strategy=heuristic" \
// RUN: --architecture-spec=../../arch_spec/architecture.yaml \
// RUN: --generate-code -o %t-mapping.mlir

// RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING

// MAPPING: module
// MAPPING: func.func
// MAPPING-SAME: mapping_mode = "spatial-temporal"
// MAPPING-SAME: mapping_strategy = "heuristic"