diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index e0b01888..ccf407db 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -22,6 +22,35 @@ def Neura_FAddOp : Op { let traits = [SameOperandsAndResultElementType]; } +// Defines a multiplication operation. +def Neura_FMulOp : Op { + let summary = "Floating multiplication operation"; + let opName = "fmul"; + let arguments = (ins AnyFloat:$lhs, AnyFloat:$rhs); + let results = (outs AnyFloat:$result); + // let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + +def VectorOfAnyFloat : + TypeConstraint< + CPred< + "mlir::isa<::mlir::VectorType>($_self) && " + "mlir::isa<::mlir::FloatType>(mlir::cast<::mlir::VectorType>($_self).getElementType())" + >, + "vector of floats" + >; + +// Defines a vector multiplication operation. +def Neura_VFMulOp : Op { + let summary = "Vector floating multiplication operation"; + let opName = "vfmul"; + let arguments = (ins VectorOfAnyFloat:$lhs, VectorOfAnyFloat:$rhs); + let results = (outs VectorOfAnyFloat:$result); + // let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + def Neura_FAddFAddOp : Op { let summary = "Fused fadd(fadd(a, b), c)"; let arguments = (ins AnyFloat:$a, AnyFloat:$b, AnyFloat:$c); @@ -30,6 +59,13 @@ def Neura_FAddFAddOp : Op { let traits = [SameOperandsAndResultElementType]; } +def Neura_FMulFAddOp : Op { + let summary = "Fused fadd(fmul(a, b), c)"; + let arguments = (ins AnyFloat:$a, AnyFloat:$b, AnyFloat:$c); + let results = (outs AnyFloat:$result); + // let assemblyFormat = "$a `,` $b `,` $c attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} // Defines a move operation for data communication. def Neura_MovOp : Op { diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index 0ad72602..39b8d6f8 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -24,8 +24,59 @@ using namespace mlir; namespace { +// Lowers integer add from mlir.llvm.add to nuera.add. We provide the lowering +// here instead of tablegen due to that mlir.llvm.add uses an EnumProperty +// (IntegerOverflowFlags) defined via MLIR interfaces — which DRR cannot match +// on or extract from. +struct LlvmAddToNeuraAdd : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(mlir::LLVM::AddOp op, + PatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), op.getRhs()); + return success(); + } +}; + +struct LlvmFMulToNeuraFMul : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(mlir::LLVM::FMulOp op, + PatternRewriter &rewriter) const override { + Value lhs = op->getOperand(0); + Value rhs = op->getOperand(1); + Type result_type = op->getResult(0).getType(); + + // Only matches scalar float. + if (!mlir::isa(result_type)) + return failure(); + + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); + return success(); + } +}; + +struct LlvmVFMulToNeuraVFMul: public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(mlir::LLVM::FMulOp op, + PatternRewriter &rewriter) const override { + Value lhs = op->getOperand(0); + Value rhs = op->getOperand(1); + Type result_type = op->getResult(0).getType(); + + // Only matches vector. + auto vecTy = mlir::dyn_cast(result_type); + if (!vecTy || !mlir::isa(vecTy.getElementType())) + return failure(); + + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); + return success(); + } +}; + struct LowerLlvmToNeuraPass - : public PassWrapper> { + : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerLlvmToNeuraPass) @@ -40,10 +91,26 @@ struct LowerLlvmToNeuraPass void runOnOperation() override { RewritePatternSet patterns(&getContext()); + // Adds DRR patterns. mlir::neura::llvm2neura::populateWithGenerated(patterns); - if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) { - signalPassFailure(); - } + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + FrozenRewritePatternSet frozen(std::move(patterns)); + + ModuleOp module_op = getOperation(); + + // Applies to every region inside the module (regardless of func type, + // e.g., mlir func or llvm func). + module_op.walk([&](Operation *op) { + if (!op->getRegions().empty()) { + for (Region ®ion : op->getRegions()) { + if (failed(applyPatternsAndFoldGreedily(region, frozen))) { + signalPassFailure(); + } + } + } + }); } }; } // namespace diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td b/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td index 9b1f0035..1053ae91 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td @@ -7,3 +7,4 @@ def : Pat< (LLVM_FAddOp $lhs, $rhs, $_fastmath), (Neura_FAddOp $lhs, $rhs) >; + diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index 1635fc21..1e99239b 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -16,6 +16,7 @@ add_mlir_library(NeuraTransforms LINK_LIBS PUBLIC MLIRIR MLIRFuncDialect + MLIRLLVMDialect MLIRPass MLIRSupport MLIRTransformUtils diff --git a/lib/Transforms/FusePatternsPass.cpp b/lib/Transforms/FusePatternsPass.cpp index b7975bb1..6e19e866 100644 --- a/lib/Transforms/FusePatternsPass.cpp +++ b/lib/Transforms/FusePatternsPass.cpp @@ -8,30 +8,92 @@ using namespace mlir; namespace { -struct FuseFAddFAddPattern : public RewritePattern { - FuseFAddFAddPattern(MLIRContext *ctx) - : RewritePattern("neura.fadd", /*benefit=*/1, ctx) {} +struct FuseFAddFAddPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override { - auto first = dyn_cast(op); - if (!first || !first->hasOneUse()) return failure(); + LogicalResult matchAndRewrite(neura::FAddOp second, + PatternRewriter &rewriter) const override { + Value lhs = second.getLhs(); + Value rhs = second.getRhs(); - auto user = dyn_cast(*first->getUsers().begin()); - if (!user) return failure(); + auto lhs_op = lhs.getDefiningOp(); + auto rhs_op = rhs.getDefiningOp(); - Location loc = user.getLoc(); - Type type = user.getType(); + neura::FAddOp first = nullptr; + Value tail; - auto fused = rewriter.create(loc, type, - first.getLhs(), first.getRhs(), user.getRhs()); + // Case 1: LHS is another fadd. + if (lhs_op && second.getRhs()) { + first = lhs_op; + tail = second.getRhs(); + } + // Case 2: RHS is another fadd. + else if (rhs_op && second.getLhs()) { + first = rhs_op; + tail = second.getLhs(); + } - rewriter.replaceOp(user, fused.getResult()); + if (!first) + return failure(); + + // Only fuses if the first fadd is not reused elsewhere. + if (!first->hasOneUse()) + return failure(); + + Location loc = second.getLoc(); + Type type = second.getType(); + + auto fused = rewriter.create( + loc, type, first.getLhs(), first.getRhs(), tail); + + rewriter.replaceOp(second, fused.getResult()); rewriter.eraseOp(first); return success(); } }; -struct FusePatternsPass : public PassWrapper> { +struct FuseFMulFAddPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(neura::FAddOp add, + PatternRewriter &rewriter) const override { + auto lhs_op = add.getLhs().getDefiningOp(); + auto rhs_op = add.getRhs().getDefiningOp(); + + neura::FMulOp fmul = nullptr; + Value other; + + // Case 1: fmul is on the LHS. + if (lhs_op && add.getRhs()) { + fmul = lhs_op; + other = add.getRhs(); + } + // Case 2: fmul is on the RHS. + else if (rhs_op && add.getLhs()) { + fmul = rhs_op; + other = add.getLhs(); + } + + if (!fmul) + return failure(); + + // Optional: only fuses if fmul has a single use. + if (!fmul->hasOneUse()) + return failure(); + + Location loc = add.getLoc(); + Type type = add.getType(); + + auto fused = rewriter.create( + loc, type, fmul.getLhs(), fmul.getRhs(), other); + + rewriter.replaceOp(add, fused.getResult()); + rewriter.eraseOp(fmul); + return success(); + } +}; + +struct FusePatternsPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(FusePatternsPass) StringRef getArgument() const override { return "fuse-patterns"; } @@ -39,9 +101,23 @@ struct FusePatternsPass : public PassWrapper(&getContext()); - if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) - signalPassFailure(); + patterns.add(&getContext(), 2); + patterns.add(&getContext(), 3); + FrozenRewritePatternSet frozen(std::move(patterns)); + + ModuleOp module_op = getOperation(); + + // Applies to every region inside the module (regardless of func type, + // e.g., mlir func or llvm func). + module_op.walk([&](Operation *op) { + if (!op->getRegions().empty()) { + for (Region ®ion : op->getRegions()) { + if (failed(applyPatternsAndFoldGreedily(region, frozen))) { + signalPassFailure(); + } + } + } + }); } }; diff --git a/lib/Transforms/InsertMovPass.cpp b/lib/Transforms/InsertMovPass.cpp index 0ff05656..5778a381 100644 --- a/lib/Transforms/InsertMovPass.cpp +++ b/lib/Transforms/InsertMovPass.cpp @@ -1,9 +1,10 @@ #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" using namespace mlir; @@ -54,7 +55,7 @@ struct InsertMovForNeuraOps : public RewritePattern { }; struct InsertMovPass - : public PassWrapper> { + : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(InsertMovPass) StringRef getArgument() const override { return "insert-mov"; } @@ -69,8 +70,21 @@ struct InsertMovPass void runOnOperation() override { RewritePatternSet patterns(&getContext()); patterns.add(&getContext()); - if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) - signalPassFailure(); + FrozenRewritePatternSet frozen(std::move(patterns)); + + ModuleOp module_op = getOperation(); + + // Applies to every region inside the module (regardless of func type, + // e.g., mlir func or llvm func). + module_op.walk([&](Operation *op) { + if (!op->getRegions().empty()) { + for (Region ®ion : op->getRegions()) { + if (failed(applyPatternsAndFoldGreedily(region, frozen))) { + signalPassFailure(); + } + } + } + }); } }; } // namespace diff --git a/test/neura/fadd_fadd.mlir b/test/neura/fadd_fadd.mlir index 63e2b5e0..fe5aa9de 100644 --- a/test/neura/fadd_fadd.mlir +++ b/test/neura/fadd_fadd.mlir @@ -1,3 +1,4 @@ +// Applies pattern fusion before mov insertion. // RUN: mlir-neura-opt --lower-arith-to-neura --fuse-patterns --insert-mov %s | FileCheck %s func.func @test(%a: f32, %b: f32) -> f32 { diff --git a/test/neura/for_loop/kernel.cpp b/test/neura/for_loop/kernel.cpp new file mode 100644 index 00000000..a2a0aec3 --- /dev/null +++ b/test/neura/for_loop/kernel.cpp @@ -0,0 +1,46 @@ +// RUN: mlir-neura-opt %s | FileCheck %s + +#include + +#define NTAPS 32 + +float input[NTAPS] = { +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, +1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 +}; +float output[NTAPS]; +float coefficients[NTAPS] = {0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25, +0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25, +0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25, +0.25, 1.50, 3.75, -2.25, 0.50, 0.75, -3.00, 1.25}; + +void kernel(float input[], float output[], float coefficient[]); + +int main() +{ + +// input_dsp (input, NTAPS, 0); + + kernel(input, output, coefficients); + +// output_dsp (input, NTAPS, 0); +// output_dsp (coefficients, NTAPS, 0); +// output_dsp (output, NTAPS, 0); + printf("output: %f\n", output[0]); + return 0; +} + +/* input : input sample array */ +/* output: output sample array */ +/* coefficient: coefficient array */ +void kernel(float input[], float output[], float coefficient[]) { + int i; + int j = 0; + + for (i = 0; i < NTAPS; ++i) { + float tmp = input[i] * coefficient[i]; + output[j] += tmp; + } +} diff --git a/test/neura/for_loop/test.mlir b/test/neura/for_loop/test.mlir new file mode 100644 index 00000000..c2c7506c --- /dev/null +++ b/test/neura/for_loop/test.mlir @@ -0,0 +1,18 @@ +// Compiles the original kernel to mlir, then lower back to llvm, eventually binary. +// RUN: clang++ -S -emit-llvm -O2 -o %t-kernel.ll kernel.cpp +// RUN: mlir-translate --import-llvm %t-kernel.ll -o %t-kernel.mlir + +// Lowers to neura. +// RUN: mlir-neura-opt \ +// RUN: --lower-llvm-to-neura \ +// RUN: --fuse-patterns \ +// RUN: --insert-mov \ +// RUN: %t-kernel.mlir | FileCheck %s + +// Verifies the neura ops are generated. And fusion happens. +// CHECK: "neura.vfmul" +// CHECK: "neura.add" +// CHECK: "neura.fmul_fadd" +// CHECK: [[LHS:%.*]] = neura.mov %{{.*}} +// CHECK-NEXT: [[RHS:%.*]] = neura.mov %{{.*}} +// CHECK-NEXT: [[RES:%.*]] = "neura.add"([[LHS]], [[RHS]]) diff --git a/tools/mlir-neura-opt/CMakeLists.txt b/tools/mlir-neura-opt/CMakeLists.txt index 5c774f2f..756124f0 100644 --- a/tools/mlir-neura-opt/CMakeLists.txt +++ b/tools/mlir-neura-opt/CMakeLists.txt @@ -8,6 +8,7 @@ add_executable(mlir-neura-opt target_link_libraries(mlir-neura-opt PRIVATE MLIRDialect # MLIR Dialect MLIRIR # MLIR Core IR + MLIRDLTIDialect MLIRLLVMDialect MLIROptLib # MLIR optimizer library MLIRSupport # MLIR Support utilities diff --git a/tools/mlir-neura-opt/mlir-neura-opt.cpp b/tools/mlir-neura-opt/mlir-neura-opt.cpp index b331313e..1de2c2d7 100644 --- a/tools/mlir-neura-opt/mlir-neura-opt.cpp +++ b/tools/mlir-neura-opt/mlir-neura-opt.cpp @@ -1,5 +1,6 @@ // tools/mlir-neura-opt/mlir-neura-opt.cpp +#include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/InitAllDialects.h" #include "mlir/InitAllPasses.h" @@ -18,6 +19,7 @@ int main(int argc, char **argv) { registry.insert(); registry.insert(); registry.insert(); + registry.insert(); registry.insert(); mlir::registerPass([]() -> std::unique_ptr { @@ -29,7 +31,6 @@ int main(int argc, char **argv) { mlir::registerPass([]() -> std::unique_ptr { return mlir::neura::createInsertMovPass(); }); - mlir::registerPass([]() -> std::unique_ptr { return mlir::neura::createFusePatternsPass(); });