diff --git a/include/Conversion/ConversionPasses.h b/include/Conversion/ConversionPasses.h index 2d871868..78f6c3ce 100644 --- a/include/Conversion/ConversionPasses.h +++ b/include/Conversion/ConversionPasses.h @@ -20,6 +20,7 @@ namespace mlir { std::unique_ptr createLowerArithToNeuraPass(); std::unique_ptr createLowerLlvmToNeuraPass(); std::unique_ptr createLowerMemRefToNeuraPass(); +std::unique_ptr createLowerBuiltinToNeuraPass(); #define GEN_PASS_REGISTRATION #include "Conversion/ConversionPasses.h.inc" diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td index 8f2db985..2e79dd96 100644 --- a/include/Conversion/ConversionPasses.td +++ b/include/Conversion/ConversionPasses.td @@ -26,4 +26,10 @@ def LowerMemRefToNeura : Pass<"lower-memref-to-neura", "ModuleOp">{ let constructor = "mlir::createLowerMemRefToNeuraPass()"; } +def LowerBuiltinToNeura : Pass<"lower-builtin-to-neura", "ModuleOp">{ + let summary = "Lower Builtin to Neura dialect"; + let description = [{Lower Builtin operations to Neura dialect operations.}]; + let constructor = "mlir::createLowerBuiltinToNeuraPass()"; +} + #endif // CONVERSION_PASSES_TD \ No newline at end of file diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index e988774a..2c2a8758 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -116,6 +116,34 @@ def Neura_StoreOp : Op { // let assemblyFormat = "$value `,` $addr `,` $predicate attr-dict"; } +// Defines a load operation with integrated address calculation. +def Neura_LoadIndexedOp: Op{ + let summary = "Load with integrated address calculation for multi-dimensional arrays"; + let description = [{ + Calculates the address using the base address and indices. + Load the value at the calculated address. + Example: + %value = neura.load_indexed %base [%arg1, %arg2] : f32 + }]; + let arguments = (ins Arg:$base, Variadic:$indices, Optional:$predicate); + let results = (outs AnyType:$result); + let assemblyFormat = "$base `[` $indices `:` type($indices) `]` type($base) ($predicate^ `:` type($predicate))? attr-dict `:` type($result)"; +} + +//Defines a store operation with integrated address calculation. +def Neura_StoreIndexedOp: Op { + let summary = "Store with integrated address calculation for multi-dimensional arrays"; + let description = [{ + Calculates the address using the base address and indices. + Store the value at the calculated address. + Example: + neura.store_indexed %value, %base [%arg1, %arg2] : f32 + }]; + let arguments = (ins AnyType:$value, Arg:$base, Variadic:$indices, Optional:$predicate); + let results = (outs); + let assemblyFormat = "$value `to` $base `[` $indices `:` type($indices) `]` type($base) ($predicate^ `:` type($predicate))? attr-dict `:` type($value)"; +} + // Defines a pointer computation operation. def Neura_GEP : Op { let summary = "Pointer computation using offset indices"; @@ -131,14 +159,14 @@ def Neura_CondBr : Op:$trueArgs, Variadic:$falseArgs); let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest); - let assemblyFormat = "$condition `:` type($condition) ($predicate^ `:` type($predicate))? `then` ($trueArgs^)? `:` type($trueArgs) `to` $trueDest `else` ($falseArgs^)? `:` type($falseArgs) `to` $falseDest attr-dict"; + let assemblyFormat = "$condition `:` type($condition) ($predicate^ `:` type($predicate))? `then` ($trueArgs^ `:` type($trueArgs))? `to` $trueDest `else` ($falseArgs^ `:` type($falseArgs))? `to` $falseDest attr-dict"; } // Defines an unconditional branch operation. def Neura_Br : Op { let arguments = (ins Variadic:$args); let successors = (successor AnySuccessor:$dest); - let assemblyFormat = "($args^)? `:` type($args) `to` $dest attr-dict"; + let assemblyFormat = "($args^ `:` type($args))? `to` $dest attr-dict"; } def Neura_SelOp : Op { diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 8808a452..426fe6d0 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -57,5 +57,4 @@ def MapToAccelerator : Pass<"map-to-accelerator", "ModuleOp"> { }]; let constructor = "neura::createMapToAcceleratorPass()"; } - #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp index 72c83c6b..e1960b66 100644 --- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp +++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp @@ -1,3 +1,4 @@ +#include "Common/AcceleratorAttrs.h" #include "Conversion/ConversionPasses.h" #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" @@ -8,6 +9,7 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/StringRef.h" namespace mlir { namespace neura { @@ -24,9 +26,6 @@ using namespace mlir; using namespace mlir::func; using namespace mlir::neura; -#define GEN_PASS_DEF_LOWERARITHTONEURA -#include "NeuraDialect/NeuraPasses.h.inc" - namespace { struct ArithConstantToNeuraConstant @@ -35,10 +34,10 @@ struct ArithConstantToNeuraConstant LogicalResult matchAndRewrite(arith::ConstantOp op, PatternRewriter &rewriter) const override { - // Converts arith constant to Neura constant + // Converts arith constant to Neura constant. Type result_type = op.getType(); Attribute value = op.getValue(); - // Optional predicate parameter can be null + // Optional predicate parameter can be null. rewriter.replaceOpWithNewOp(op, result_type, value, nullptr); return success(); @@ -54,7 +53,7 @@ struct ArithAddIToNeuraAdd : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -70,7 +69,7 @@ struct ArithFAddToNeuraFAdd : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -86,7 +85,7 @@ struct ArithSubIToNeuraSub : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -102,7 +101,7 @@ struct ArithSubFToNeuraFSub : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -118,7 +117,7 @@ struct ArithMulFToNeuraFMul : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -134,7 +133,7 @@ struct ArithFDivToNeuraFDiv : public OpRewritePattern { Value rhs = op.getRhs(); Type result_type = op.getType(); - // Optional predicate: default to null + // Optional predicate: default to null. rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs, nullptr); return success(); @@ -185,8 +184,8 @@ struct ArithCmpiToNeuraICmp : public OpRewritePattern { return rewriter.notifyMatchFailure(op, "Unsupported arith CmpIOp type"); } - // Convert arith CmpIOp to Neura ICmpOp - // Optional predicate: default to null + // Converts arith CmpIOp to Neura ICmpOp. + // Optional predicate: default to null. rewriter.replaceOpWithNewOp( op, result_type, lhs, rhs, nullptr, rewriter.getStringAttr(cmp_type)); return success(); @@ -203,7 +202,7 @@ struct ArithSelectToNeuraSel : public OpRewritePattern { Value false_value = op.getFalseValue(); Type result_type = op.getType(); - // Convert arith SelectOp to Neura SelOp + // Converts arith SelectOp to Neura SelOp. rewriter.replaceOpWithNewOp(op, result_type, true_value, false_value, condition); return success(); @@ -218,8 +217,8 @@ struct ArithExtUIToNeuraCast : public OpRewritePattern { Value input = op.getIn(); Type result_type = op.getType(); - // Convert arith ExtUIOp to Neura cast operation - // Optional predicate: default to null + // Converts arith ExtUIOp to Neura cast operation. + // Optional predicate: default to null. rewriter.replaceOpWithNewOp( op, result_type, input, rewriter.getStringAttr("extui"), nullptr); return success(); @@ -234,8 +233,8 @@ struct ArithExtfToNeuraCast : public OpRewritePattern { Value input = op.getIn(); Type result_type = op.getType(); - // Convert arith ExtFOp to Neura cast operation - // Optional predicate: default to null + // Converts arith ExtFOp to Neura cast operation. + // Optional predicate: default to null. rewriter.replaceOpWithNewOp( op, result_type, input, rewriter.getStringAttr("extf"), nullptr); return success(); @@ -250,11 +249,23 @@ struct ArithIndexCastToNeuraCast PatternRewriter &rewriter) const override { Value input = op.getIn(); Type result_type = op.getType(); + Type in_type = input.getType(); + StringRef cast_string; + + // The isa check is generic and handles any integer bit width. + // (e.g., i32, i64). + if (in_type.isIndex() && isa(result_type)) { + cast_string = "index_to_int"; + } else if (isa(in_type) && result_type.isIndex()) { + cast_string = "int_to_index"; + } else { + return rewriter.notifyMatchFailure(op, "index_cast"); + } - // Convert arith IndexCastOp to Neura cast operation - // Optional predicate: default to null + // Converts arith IndexCastOp to Neura cast operation. + // Optional predicate: default to null. rewriter.replaceOpWithNewOp( - op, result_type, input, rewriter.getStringAttr("indexCast"), nullptr); + op, result_type, input, rewriter.getStringAttr(cast_string), nullptr); return success(); } }; @@ -274,16 +285,28 @@ struct LowerArithToNeuraPass } void runOnOperation() override { - RewritePatternSet patterns(&getContext()); - mlir::neura::arith2neura::populateWithGenerated(patterns); - patterns - .add(&getContext()); - if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { - signalPassFailure(); - } + ModuleOp module_op = getOperation(); + MLIRContext *context = &getContext(); + module_op.walk([&](func::FuncOp func_op) { + if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) { + auto target = + func_op->getAttrOfType(mlir::accel::kAcceleratorAttr); + if (target && target.getValue() == mlir::accel::kNeuraTarget) { + RewritePatternSet patterns(&getContext()); + mlir::neura::arith2neura::populateWithGenerated(patterns); + patterns.add(context); + if (failed( + applyPatternsGreedily(getOperation(), std::move(patterns)))) { + signalPassFailure(); + } + } + } + }); } }; } // namespace diff --git a/lib/Conversion/BuiltinToNeura/BuiltinToNeuraPass.cpp b/lib/Conversion/BuiltinToNeura/BuiltinToNeuraPass.cpp new file mode 100644 index 00000000..260d8d90 --- /dev/null +++ b/lib/Conversion/BuiltinToNeura/BuiltinToNeuraPass.cpp @@ -0,0 +1,88 @@ +#include "Common/AcceleratorAttrs.h" +#include "Conversion/ConversionPasses.h" +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::neura; + +namespace { + +struct BuiltinUnrealizedConversionCastToNeuraCast + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(mlir::UnrealizedConversionCastOp op, + PatternRewriter &rewriter) const override { + // Only handles simple 1:1 casts. + // TODO: Handle more complex casts if needed. + if (op.getInputs().size() == 1 && op.getResults().size() == 1) { + Value input = op.getInputs()[0]; + Type result_type = op.getResults()[0].getType(); + Type input_type = input.getType(); + + StringRef cast_type; + if (input_type.isIndex() && isa(result_type)) { + cast_type = "index_to_int"; + } else if (isa(input_type) && result_type.isIndex()) { + cast_type = "int_to_index"; + } else { + return rewriter.notifyMatchFailure(op, "unsupported cast"); + } + + // Optional predicate: default to null. + rewriter.replaceOpWithNewOp( + op, result_type, input, rewriter.getStringAttr(cast_type), nullptr); + return success(); + } + return failure(); + } +}; + +struct LowerBuiltinToNeuraPass + : public PassWrapper> { + + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LowerBuiltinToNeuraPass) + + StringRef getArgument() const override { return "lower-builtin-to-neura"; } + StringRef getDescription() const override { + return "Lower Builtin operations to Neura dialect operations"; + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + ModuleOp module_op = getOperation(); + MLIRContext *context = &getContext(); + RewritePatternSet patterns(&getContext()); + patterns.add(context); + module_op.walk([&](func::FuncOp func_op) { + if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) { + auto target = + func_op->getAttrOfType(mlir::accel::kAcceleratorAttr); + if (target && target.getValue() == mlir::accel::kNeuraTarget) { + if (failed(applyPatternsGreedily(func_op, std::move(patterns)))) { + return signalPassFailure(); + } + } + } + }); + } +}; +} // namespace + +std::unique_ptr mlir::createLowerBuiltinToNeuraPass() { + return std::make_unique(); +} diff --git a/lib/Conversion/BuiltinToNeura/CMakeLists.txt b/lib/Conversion/BuiltinToNeura/CMakeLists.txt new file mode 100644 index 00000000..094aa44d --- /dev/null +++ b/lib/Conversion/BuiltinToNeura/CMakeLists.txt @@ -0,0 +1,18 @@ +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_mlir_conversion_library(MLIRNeuraBuiltinToNeuraPass + BuiltinToNeuraPass.cpp + + DEPENDS + MLIRConversionIncGen + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRFuncDialect + MLIRLLVMDialect + MLIRIR + MLIRPass + MLIRTransforms + MLIRNeura + MLIRSupport +) diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index af5bb68a..ee851744 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -3,6 +3,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_subdirectory(ArithToNeura) add_subdirectory(LlvmToNeura) add_subdirectory(MemRefToNeura) +add_subdirectory(BuiltinToNeura) # add_mlir_library( # MLIRNeuraConversion @@ -32,5 +33,7 @@ target_link_libraries(MLIRConversion INTERFACE MLIRNeura MLIRNeuraArithToNeuraPass MLIRNeuraLlvmToNeuraPass + MLIRNeuraMemRefToNeuraPass + MLIRNeuraBuiltinToNeuraPass ${dialect_libs} ) \ No newline at end of file diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index 6bc815b3..758c3fca 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -25,10 +25,6 @@ namespace llvm2neura { using namespace mlir; using namespace mlir::neura; -#define GEN_PASS_DEF_LOWERLLVMTONEURA -#include "NeuraDialect/NeuraPasses.h.inc" - - namespace { // Lowers integer add from mlir.llvm.add to nuera.add. We provide the lowering // here instead of tablegen due to that mlir.llvm.add uses an EnumProperty diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td b/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td index 3aef67d8..1b99a47c 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPatterns.td @@ -2,11 +2,3 @@ include "mlir/IR/OpBase.td" include "mlir/IR/PatternBase.td" include "mlir/Dialect/LLVMIR/LLVMOps.td" include "NeuraDialect/NeuraOps.td" - -// Floating point binary operations. -// Deprecated Pattern: Because we need the predicate bit to be set to null initially -// def : Pat< -// (LLVM_FSubOp $lhs, $rhs, $_fastmath), -// (Neura_FSubOp $lhs, $rhs) -// >; - diff --git a/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp b/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp index 3d3b543c..312797e4 100644 --- a/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp +++ b/lib/Conversion/MemRefToNeura/MemRefToNeuraPass.cpp @@ -1,23 +1,54 @@ #include "Common/AcceleratorAttrs.h" +#include "Conversion/ConversionPasses.h" #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/MLIRContext.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "Conversion/ConversionPasses.h" +#include "llvm/Support/raw_ostream.h" using namespace mlir; using namespace mlir::neura; -#define GEN_PASS_DEF_LOWERLLVMTONEURA -#include "NeuraDialect/NeuraPasses.h.inc" +namespace { +struct MemRefLoadLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; -namespace { + LogicalResult matchAndRewrite(memref::LoadOp load_op, + PatternRewriter &rewriter) const override { + // Creates a Neura LoadIndexedOp from the MemRef LoadOp. + Type result_type = load_op.getType(); + Value memref = load_op.getMemRef(); + ValueRange indices = load_op.getIndices(); + // Optiional predicate: default to null + rewriter.replaceOpWithNewOp(load_op, result_type, + memref, indices, nullptr); + return success(); + } +}; + +struct MemRefStoreLowering : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::StoreOp store_op, + PatternRewriter &rewriter) const override { + // Creates a Neura StoreIndexedOp from the MemRef StoreOp. + Value value = store_op.getValueToStore(); + Value memref = store_op.getMemRef(); + ValueRange indices = store_op.getIndices(); + // Optional predicate: default to null. + rewriter.replaceOpWithNewOp(store_op, value, memref, + indices, nullptr); + return success(); + } +}; struct LowerMemRefToNeuraPass : public PassWrapper> { @@ -34,7 +65,22 @@ struct LowerMemRefToNeuraPass } void runOnOperation() override { + ModuleOp module_op = getOperation(); + MLIRContext *context = &getContext(); RewritePatternSet patterns(&getContext()); + patterns.add(context); + patterns.add(context); + module_op.walk([&](func::FuncOp func_op) { + if (func_op->hasAttr(mlir::accel::kAcceleratorAttr)) { + auto target = + func_op->getAttrOfType(mlir::accel::kAcceleratorAttr); + if (target && target.getValue() == mlir::accel::kNeuraTarget) { + if (failed(applyPatternsGreedily(func_op, std::move(patterns)))) { + return signalPassFailure(); + } + } + } + }); } }; } // namespace diff --git a/lib/NeuraDialect/NeuraPasses.cpp b/lib/NeuraDialect/NeuraPasses.cpp index 6a88abee..a346ddb0 100644 --- a/lib/NeuraDialect/NeuraPasses.cpp +++ b/lib/NeuraDialect/NeuraPasses.cpp @@ -2,19 +2,20 @@ #include "mlir/Pass/PassRegistry.h" #include "mlir/Transforms/Passes.h" +#include "Conversion/ConversionPasses.h" #include "NeuraDialect/NeuraDialect.h" #include "NeuraDialect/NeuraOps.h" #include "NeuraDialect/NeuraPasses.h" #include "NeuraDialect/NeuraTypes.h" -#include "Conversion/ConversionPasses.h" // This pass pipeline can convert all the other dialects into the Neura dialect void mlir::neura::registerNeuraConversionPassPipeline() { - PassPipelineRegistration<>("neura-conversion", - "Convert all dialects to Neura dialect", - [](OpPassManager &pm) { - // Convert all the other dialects into the Neura dialect - pm.addPass(mlir::createLowerArithToNeuraPass()); - pm.addPass(mlir::createLowerLlvmToNeuraPass()); - }); + PassPipelineRegistration<>( + "neura-conversion", "Convert all dialects to Neura dialect", + [](OpPassManager &pm) { + pm.addPass(mlir::neura::createAssignAcceleratorPass()); + // Convert all the other dialects into the Neura dialect + pm.addPass(mlir::createLowerArithToNeuraPass()); + pm.addPass(mlir::createLowerLlvmToNeuraPass()); + }); } \ No newline at end of file diff --git a/test/affine2neura/bert/bert_node0/bert_node0.mlir b/test/affine2neura/bert/bert_node0/bert_node0.mlir index 4c1eef85..ba82071e 100644 --- a/test/affine2neura/bert/bert_node0/bert_node0.mlir +++ b/test/affine2neura/bert/bert_node0/bert_node0.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node0PA128_KiPA128_b(%arg0: memref, %arg1: memref) attributes {} { @@ -15,25 +15,23 @@ module attributes {} { } // CHECK: func.func @_Z10bert_node0PA128_KiPA128_b(%arg0: memref, %arg1: memref) attributes {accelerator = "neura"} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 -// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%4 : i64) +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb2 -// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index -// CHECK-NEXT: %7 = "neura.icmp"(%6, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb3 +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %7 = "neura.icmp"(%6, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb3 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = memref.load %arg0[%3, %6] : memref -// CHECK-NEXT: %9 = "neura.icmp"(%8, %2) <{cmpType = "sgt"}> : (i32, i32) -> i1 -// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "extui"}> : (i1) -> i8 -// CHECK-NEXT: memref.store %10, %arg1[%3, %6] : memref -// CHECK-NEXT: %11 = "neura.add"(%6, %0) : (index, index) -> index -// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %11 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%12 : i64) +// CHECK-NEXT: %8 = neura.load_indexed %arg0[%3, %6 : index, index] memref : i32 +// CHECK-NEXT: %9 = "neura.icmp"(%8, %2) <{cmpType = "sgt"}> : (i32, i32) -> i1 +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "extui"}> : (i1) -> i8 +// CHECK-NEXT: neura.store_indexed %10 to %arg1[%3, %6 : index, index] memref : i8 +// CHECK-NEXT: %11 = "neura.add"(%6, %0) : (index, index) -> index +// CHECK-NEXT: %12 = "neura.cast"(%11) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %12 : i64 to ^bb1 // CHECK-NEXT: ^bb3: // pred: ^bb1 -// CHECK-NEXT: return -// CHECK-NEXT: } -// CHECK-NEXT: } +// CHECK-NEXT: "neura.return"() : () -> () diff --git a/test/affine2neura/bert/bert_node1/bert_node1.mlir b/test/affine2neura/bert/bert_node1/bert_node1.mlir index 0280d7c3..f79959a2 100644 --- a/test/affine2neura/bert/bert_node1/bert_node1.mlir +++ b/test/affine2neura/bert/bert_node1/bert_node1.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {} { affine.for %arg2 = 0 to 128 { @@ -13,32 +13,32 @@ module attributes {} { } // CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura"} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %3 = builtin.unrealized_conversion_cast %2 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%3 : i64) -// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %5 = builtin.unrealized_conversion_cast %4 : i64 to index -// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %6, ^bb2, ^bb6 -// CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %7 = builtin.unrealized_conversion_cast %2 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%7 : i64) -// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %9 = builtin.unrealized_conversion_cast %8 : i64 to index -// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %10, ^bb4, ^bb5 -// CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %11 = memref.load %arg0[%2, %2, %2, %2, %2, %9] : memref -// CHECK-NEXT: memref.store %11, %arg1[%2, %2, %5, %2, %2, %9] : memref -// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index -// CHECK-NEXT: %13 = builtin.unrealized_conversion_cast %12 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%13 : i64) -// CHECK-NEXT: ^bb5: // pred: ^bb3 -// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index -// CHECK-NEXT: %15 = builtin.unrealized_conversion_cast %14 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%15 : i64) -// CHECK-NEXT: ^bb6: // pred: ^bb1 -// CHECK-NEXT: return -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %3 : i64 to ^bb1 +// CHECK-NEXT: ^bb1(%4: i64): // 2 preds: ^bb0, ^bb5 +// CHECK-NEXT: %5 = "neura.cast"(%4) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %6 = "neura.icmp"(%5, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %6 : i1 then to ^bb2 else to ^bb6 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %7 = "neura.cast"(%2) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %7 : i64 to ^bb3 +// CHECK-NEXT: ^bb3(%8: i64): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %9 = "neura.cast"(%8) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %10 = "neura.icmp"(%9, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %10 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %11 = neura.load_indexed %arg0[%2, %2, %2, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: neura.store_indexed %11 to %arg1[%2, %2, %5, %2, %2, %9 : index, index, index, index, index, index] memref : i8 +// CHECK-NEXT: %12 = "neura.add"(%9, %0) : (index, index) -> index +// CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %13 : i64 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %14 = "neura.add"(%5, %0) : (index, index) -> index +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %15 : i64 to ^bb1 +// CHECK-NEXT: ^bb6: // pred: ^bb1 +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } diff --git a/test/affine2neura/bert/bert_node2/bert_node2.mlir b/test/affine2neura/bert/bert_node2/bert_node2.mlir index 6b70666a..0bc0a274 100644 --- a/test/affine2neura/bert/bert_node2/bert_node2.mlir +++ b/test/affine2neura/bert/bert_node2/bert_node2.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node2PA128_KiPA768_KfPA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {} { %false = arith.constant false @@ -28,51 +28,51 @@ module attributes {} { } // CHECK: func.func @_Z10bert_node2PA128_KiPA768_KfPA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {accelerator = "neura"} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %3 = "neura.constant"() <{value = false}> : () -> i1 -// CHECK-NEXT: %4 = "neura.constant"() <{value = 30521 : i32}> : () -> i32 -// CHECK-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32 -// CHECK-NEXT: %6 = "neura.constant"() <{value = 30522 : i32}> : () -> i32 -// CHECK-NEXT: %7 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %7 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%8 : i64) +// CHECK-NEXT: %0 = "neura.constant"() <{value = 768 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %3 = "neura.constant"() <{value = false}> : () -> i1 +// CHECK-NEXT: %4 = "neura.constant"() <{value = 30521 : i32}> : () -> i32 +// CHECK-NEXT: %5 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %6 = "neura.constant"() <{value = 30522 : i32}> : () -> i32 +// CHECK-NEXT: %7 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %8 = "neura.cast"(%7) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %8 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%9: i64): // 2 preds: ^bb0, ^bb9 -// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index -// CHECK-NEXT: %11 = "neura.icmp"(%10, %2) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %11, ^bb2, ^bb10 +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %11 = "neura.icmp"(%10, %2) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb2 else to ^bb10 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %7 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%12 : i64) +// CHECK-NEXT: %12 = "neura.cast"(%7) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %12 : i64 to ^bb3 // CHECK-NEXT: ^bb3(%13: i64): // 2 preds: ^bb2, ^bb8 -// CHECK-NEXT: %14 = builtin.unrealized_conversion_cast %13 : i64 to index -// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %15, ^bb4, ^bb9 +// CHECK-NEXT: %14 = "neura.cast"(%13) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %15 : i1 then to ^bb4 else to ^bb9 // CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %16 = memref.load %arg0[%7, %10] : memref -// CHECK-NEXT: %17 = "neura.icmp"(%16, %6) <{cmpType = "sge"}> : (i32, i32) -> i1 -// CHECK-NEXT: %18 = "neura.sel"(%4, %16, %17) : (i32, i32, i1) -> i32 -// CHECK-NEXT: llvm.cond_br %17, ^bb5, ^bb6 +// CHECK-NEXT: %16 = neura.load_indexed %arg0[%7, %10 : index, index] memref : i32 +// CHECK-NEXT: %17 = "neura.icmp"(%16, %6) <{cmpType = "sge"}> : (i32, i32) -> i1 +// CHECK-NEXT: %18 = "neura.sel"(%4, %16, %17) : (i32, i32, i1) -> i32 +// CHECK-NEXT: neura.cond_br %17 : i1 then to ^bb5 else to ^bb6 // CHECK-NEXT: ^bb5: // pred: ^bb4 -// CHECK-NEXT: llvm.br ^bb7(%3 : i1) +// CHECK-NEXT: neura.br %3 : i1 to ^bb7 // CHECK-NEXT: ^bb6: // pred: ^bb4 -// CHECK-NEXT: %19 = "neura.icmp"(%16, %5) <{cmpType = "slt"}> : (i32, i32) -> i1 -// CHECK-NEXT: llvm.br ^bb7(%19 : i1) +// CHECK-NEXT: %19 = "neura.icmp"(%16, %5) <{cmpType = "slt"}> : (i32, i32) -> i1 +// CHECK-NEXT: neura.br %19 : i1 to ^bb7 // CHECK-NEXT: ^bb7(%20: i1): // 2 preds: ^bb5, ^bb6 -// CHECK-NEXT: llvm.br ^bb8 +// CHECK-NEXT: neura.br to ^bb8 // CHECK-NEXT: ^bb8: // pred: ^bb7 -// CHECK-NEXT: %21 = "neura.sel"(%5, %18, %20) : (i32, i32, i1) -> i32 -// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "indexCast"}> : (i32) -> index -// CHECK-NEXT: %23 = memref.load %arg1[%22, %14] : memref -// CHECK-NEXT: memref.store %23, %arg2[%7, %10, %14] : memref -// CHECK-NEXT: %24 = "neura.add"(%14, %1) : (index, index) -> index -// CHECK-NEXT: %25 = builtin.unrealized_conversion_cast %24 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%25 : i64) +// CHECK-NEXT: %21 = "neura.sel"(%5, %18, %20) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "int_to_index"}> : (i32) -> index +// CHECK-NEXT: %23 = neura.load_indexed %arg1[%22, %14 : index, index] memref : f32 +// CHECK-NEXT: neura.store_indexed %23 to %arg2[%7, %10, %14 : index, index, index] memref : f32 +// CHECK-NEXT: %24 = "neura.add"(%14, %1) : (index, index) -> index +// CHECK-NEXT: %25 = "neura.cast"(%24) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %25 : i64 to ^bb3 // CHECK-NEXT: ^bb9: // pred: ^bb3 -// CHECK-NEXT: %26 = "neura.add"(%10, %1) : (index, index) -> index -// CHECK-NEXT: %27 = builtin.unrealized_conversion_cast %26 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%27 : i64) +// CHECK-NEXT: %26 = "neura.add"(%10, %1) : (index, index) -> index +// CHECK-NEXT: %27 = "neura.cast"(%26) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %27 : i64 to ^bb1 // CHECK-NEXT: ^bb10: // pred: ^bb1 -// CHECK-NEXT: return -// CHECK-NEXT: } \ No newline at end of file +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } diff --git a/test/affine2neura/bert/bert_node28/bert_node28.mlir b/test/affine2neura/bert/bert_node28/bert_node28.mlir index 01f54a51..e93de764 100644 --- a/test/affine2neura/bert/bert_node28/bert_node28.mlir +++ b/test/affine2neura/bert/bert_node28/bert_node28.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {} { affine.for %arg3 = 0 to 128 { @@ -22,43 +22,43 @@ module attributes {} { // CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index // CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%4 : i64) +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb8 -// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb9 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb9 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%8 : i64) +// CHECK-NEXT: %8 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %8 : i64 to ^bb3 // CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb7 -// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %11, ^bb4, ^bb8 +// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb4 else to ^bb8 // CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %12 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb5(%12 : i64) +// CHECK-NEXT: %12 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %12 : i64 to ^bb5 // CHECK-NEXT: ^bb5(%13: i64): // 2 preds: ^bb4, ^bb6 -// CHECK-NEXT: %14 = builtin.unrealized_conversion_cast %13 : i64 to index +// CHECK-NEXT: %14 = "neura.cast"(%13) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %15 = "neura.icmp"(%14, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %15, ^bb6, ^bb7 +// CHECK-NEXT: neura.cond_br %15 : i1 then to ^bb6 else to ^bb7 // CHECK-NEXT: ^bb6: // pred: ^bb5 -// CHECK-NEXT: %16 = memref.load %arg0[%3, %6, %14] : memref -// CHECK-NEXT: %17 = memref.load %arg1[%3, %14, %10] : memref -// CHECK-NEXT: %18 = memref.load %arg2[%3, %6, %10] : memref +// CHECK-NEXT: %16 = neura.load_indexed %arg0[%3, %6, %14 : index, index, index] memref : f32 +// CHECK-NEXT: %17 = neura.load_indexed %arg1[%3, %14, %10 : index, index, index] memref : f32 +// CHECK-NEXT: %18 = neura.load_indexed %arg2[%3, %6, %10 : index, index, index] memref : f32 // CHECK-NEXT: %19 = "neura.fmul"(%16, %17) : (f32, f32) -> f32 // CHECK-NEXT: %20 = "neura.fadd"(%18, %19) : (f32, f32) -> f32 -// CHECK-NEXT: memref.store %20, %arg2[%3, %6, %10] : memref +// CHECK-NEXT: neura.store_indexed %20 to %arg2[%3, %6, %10 : index, index, index] memref : f32 // CHECK-NEXT: %21 = "neura.add"(%14, %1) : (index, index) -> index -// CHECK-NEXT: %22 = builtin.unrealized_conversion_cast %21 : index to i64 -// CHECK-NEXT: llvm.br ^bb5(%22 : i64) +// CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %22 : i64 to ^bb5 // CHECK-NEXT: ^bb7: // pred: ^bb5 // CHECK-NEXT: %23 = "neura.add"(%10, %1) : (index, index) -> index -// CHECK-NEXT: %24 = builtin.unrealized_conversion_cast %23 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%24 : i64) +// CHECK-NEXT: %24 = "neura.cast"(%23) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %24 : i64 to ^bb3 // CHECK-NEXT: ^bb8: // pred: ^bb3 // CHECK-NEXT: %25 = "neura.add"(%6, %1) : (index, index) -> index -// CHECK-NEXT: %26 = builtin.unrealized_conversion_cast %25 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%26 : i64) +// CHECK-NEXT: %26 = "neura.cast"(%25) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %26 : i64 to ^bb1 // CHECK-NEXT: ^bb9: // pred: ^bb1 -// CHECK-NEXT: return \ No newline at end of file +// CHECK-NEXT: "neura.return"() : () -> () diff --git a/test/affine2neura/bert/bert_node3/bert_node3.mlir b/test/affine2neura/bert/bert_node3/bert_node3.mlir index 1c400deb..19d121e4 100644 --- a/test/affine2neura/bert/bert_node3/bert_node3.mlir +++ b/test/affine2neura/bert/bert_node3/bert_node3.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node3PA128_A768_KfS2_PA128_A768_f(%arg0: memref, %arg1: memref, %arg2: memref) attributes {} { affine.for %arg3 = 0 to 128 { @@ -19,30 +19,31 @@ module attributes {} { // CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index // CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%4 : i64) +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb6 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb6 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%8 : i64) +// CHECK-NEXT: %8 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %8 : i64 to ^bb3 // CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %11, ^bb4, ^bb5 +// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb4 else to ^bb5 // CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %12 = memref.load %arg0[%3, %6, %10] : memref -// CHECK-NEXT: %13 = memref.load %arg1[%3, %6, %10] : memref +// CHECK-NEXT: %12 = neura.load_indexed %arg0[%3, %6, %10 : index, index, index] memref : f32 +// CHECK-NEXT: %13 = neura.load_indexed %arg1[%3, %6, %10 : index, index, index] memref : f32 // CHECK-NEXT: %14 = "neura.fadd"(%12, %13) : (f32, f32) -> f32 -// CHECK-NEXT: memref.store %14, %arg2[%3, %6, %10] : memref +// CHECK-NEXT: neura.store_indexed %14 to %arg2[%3, %6, %10 : index, index, index] memref : f32 // CHECK-NEXT: %15 = "neura.add"(%10, %1) : (index, index) -> index -// CHECK-NEXT: %16 = builtin.unrealized_conversion_cast %15 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%16 : i64) +// CHECK-NEXT: %16 = "neura.cast"(%15) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %16 : i64 to ^bb3 // CHECK-NEXT: ^bb5: // pred: ^bb3 // CHECK-NEXT: %17 = "neura.add"(%6, %1) : (index, index) -> index -// CHECK-NEXT: %18 = builtin.unrealized_conversion_cast %17 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%18 : i64) +// CHECK-NEXT: %18 = "neura.cast"(%17) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %18 : i64 to ^bb1 // CHECK-NEXT: ^bb6: // pred: ^bb1 -// CHECK-NEXT: return +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } \ No newline at end of file diff --git a/test/affine2neura/bert/bert_node8/bert_node8.mlir b/test/affine2neura/bert/bert_node8/bert_node8.mlir index dbb59d40..b0cb6345 100644 --- a/test/affine2neura/bert/bert_node8/bert_node8.mlir +++ b/test/affine2neura/bert/bert_node8/bert_node8.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node8PA128_A1_KfPA128_A1_f(%arg0: memref, %arg1: memref) attributes {} { %cst = arith.constant 7.680000e+02 : f32 @@ -17,18 +17,19 @@ module attributes {} { // CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 7.680000e+02 : f32}> : () -> f32 // CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%4 : i64) +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb2 -// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %7 = "neura.icmp"(%6, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb3 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb3 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = memref.load %arg0[%3, %6, %3] : memref +// CHECK-NEXT: %8 = neura.load_indexed %arg0[%3, %6, %3 : index, index, index] memref : f32 // CHECK-NEXT: %9 = "neura.fdiv"(%8, %2) : (f32, f32) -> f32 -// CHECK-NEXT: memref.store %9, %arg1[%3, %6, %3] : memref +// CHECK-NEXT: neura.store_indexed %9 to %arg1[%3, %6, %3 : index, index, index] memref : f32 // CHECK-NEXT: %10 = "neura.add"(%6, %0) : (index, index) -> index -// CHECK-NEXT: %11 = builtin.unrealized_conversion_cast %10 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%11 : i64) +// CHECK-NEXT: %11 = "neura.cast"(%10) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %11 : i64 to ^bb1 // CHECK-NEXT: ^bb3: // pred: ^bb1 -// CHECK-NEXT: return +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } diff --git a/test/affine2neura/bert/bert_node9/bert_node9.mlir b/test/affine2neura/bert/bert_node9/bert_node9.mlir index 3641e16b..333589ab 100644 --- a/test/affine2neura/bert/bert_node9/bert_node9.mlir +++ b/test/affine2neura/bert/bert_node9/bert_node9.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir -// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura | FileCheck %s +// RUN: mlir-neura-opt %t-llvm.mlir --assign-accelerator --lower-arith-to-neura --lower-memref-to-neura --lower-builtin-to-neura --lower-llvm-to-neura | FileCheck %s module attributes {} { func.func @_Z10bert_node9PA128_A768_KfPA128_A768_d(%arg0: memref, %arg1: memref) attributes {} { affine.for %arg2 = 0 to 128 { @@ -19,29 +19,30 @@ module attributes {} { // CHECK-NEXT: %1 = "neura.constant"() <{value = 1 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 128 : index}> : () -> index // CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%4 : i64) +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4 : i64 to ^bb1 // CHECK-NEXT: ^bb1(%5: i64): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %6 = builtin.unrealized_conversion_cast %5 : i64 to index +// CHECK-NEXT: %6 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %7 = "neura.icmp"(%6, %2) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %7, ^bb2, ^bb6 +// CHECK-NEXT: neura.cond_br %7 : i1 then to ^bb2 else to ^bb6 // CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %8 = builtin.unrealized_conversion_cast %3 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%8 : i64) +// CHECK-NEXT: %8 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %8 : i64 to ^bb3 // CHECK-NEXT: ^bb3(%9: i64): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %10 = builtin.unrealized_conversion_cast %9 : i64 to index +// CHECK-NEXT: %10 = "neura.cast"(%9) <{cast_type = "int_to_index"}> : (i64) -> index // CHECK-NEXT: %11 = "neura.icmp"(%10, %0) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: llvm.cond_br %11, ^bb4, ^bb5 +// CHECK-NEXT: neura.cond_br %11 : i1 then to ^bb4 else to ^bb5 // CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %12 = memref.load %arg0[%3, %6, %10] : memref +// CHECK-NEXT: %12 = neura.load_indexed %arg0[%3, %6, %10 : index, index, index] memref : f32 // CHECK-NEXT: %13 = "neura.cast"(%12) <{cast_type = "extf"}> : (f32) -> f64 -// CHECK-NEXT: memref.store %13, %arg1[%3, %6, %10] : memref +// CHECK-NEXT: neura.store_indexed %13 to %arg1[%3, %6, %10 : index, index, index] memref : f64 // CHECK-NEXT: %14 = "neura.add"(%10, %1) : (index, index) -> index -// CHECK-NEXT: %15 = builtin.unrealized_conversion_cast %14 : index to i64 -// CHECK-NEXT: llvm.br ^bb3(%15 : i64) +// CHECK-NEXT: %15 = "neura.cast"(%14) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %15 : i64 to ^bb3 // CHECK-NEXT: ^bb5: // pred: ^bb3 // CHECK-NEXT: %16 = "neura.add"(%6, %1) : (index, index) -> index -// CHECK-NEXT: %17 = builtin.unrealized_conversion_cast %16 : index to i64 -// CHECK-NEXT: llvm.br ^bb1(%17 : i64) +// CHECK-NEXT: %17 = "neura.cast"(%16) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %17 : i64 to ^bb1 // CHECK-NEXT: ^bb6: // pred: ^bb1 -// CHECK-NEXT: return +// CHECK-NEXT: "neura.return"() : () -> () +// CHECK-NEXT: } diff --git a/test/arith2neura/add.mlir b/test/arith2neura/add.mlir index fd06d201..71e1a995 100644 --- a/test/arith2neura/add.mlir +++ b/test/arith2neura/add.mlir @@ -1,5 +1,5 @@ // RUN: neura-compiler --neura-conversion %s | FileCheck %s --check-prefix=COMPILER -// RUN: mlir-neura-opt --lower-arith-to-neura %s | FileCheck %s --check-prefix=OPT +// RUN: mlir-neura-opt --assign-accelerator --lower-arith-to-neura %s | FileCheck %s --check-prefix=OPT func.func @test(%a: f32) -> f32 { %b = arith.constant 2.0 : f32 diff --git a/test/neura/arith_add.mlir b/test/neura/arith_add.mlir index 86ecefa7..40ee8fe9 100644 --- a/test/neura/arith_add.mlir +++ b/test/neura/arith_add.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-neura-opt --lower-arith-to-neura --insert-data-mov %s | FileCheck %s +// RUN: mlir-neura-opt --assign-accelerator --lower-arith-to-neura --insert-data-mov %s | FileCheck %s func.func @test(%a: f32) -> f32 { %b = arith.constant 2.0 : f32 diff --git a/test/neura/ctrl/branch_without_arg.mlir b/test/neura/ctrl/branch_without_arg.mlir index 385fe20f..131753d4 100644 --- a/test/neura/ctrl/branch_without_arg.mlir +++ b/test/neura/ctrl/branch_without_arg.mlir @@ -39,7 +39,7 @@ func.func @test(%in: i64) -> f32 { // CHECK-NEXT: %3 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data // CHECK-NEXT: %4 = "neura.constant"() <{predicate = true, value = 4.000000e+00 : f32}> : () -> !neura.data // CHECK-NEXT: %5 = "neura.icmp"(%arg0, %0) <{cmpType = "eq"}> : (i64, !neura.data) -> !neura.data -// CHECK-NEXT: neura.cond_br %5 : !neura.data then %3, %4 : !neura.data, !neura.data to ^bb2 else : to ^bb1 +// CHECK-NEXT: neura.cond_br %5 : !neura.data then %3, %4 : !neura.data, !neura.data to ^bb2 else to ^bb1 // CHECK-NEXT: ^bb1: // pred: ^bb0 // CHECK-NEXT: %6 = "neura.fadd"(%1, %2) : (!neura.data, !neura.data) -> !neura.data // CHECK-NEXT: neura.br %6 : !neura.data to ^bb3 diff --git a/test/neura/fadd_fadd.mlir b/test/neura/fadd_fadd.mlir index da1aef44..87ca3f8e 100644 --- a/test/neura/fadd_fadd.mlir +++ b/test/neura/fadd_fadd.mlir @@ -1,5 +1,5 @@ // Applies pattern fusion before mov insertion. -// RUN: mlir-neura-opt --lower-arith-to-neura --fuse-patterns --insert-data-mov %s | FileCheck %s +// RUN: mlir-neura-opt --assign-accelerator --lower-arith-to-neura --fuse-patterns --insert-data-mov %s | FileCheck %s func.func @test(%a: f32, %b: f32) -> f32 { %c = arith.constant 2.0 : f32 diff --git a/test/samples/bert/bert_affine.mlir b/test/samples/bert/bert_affine.mlir index e47b9f88..cd20b0b3 100644 --- a/test/samples/bert/bert_affine.mlir +++ b/test/samples/bert/bert_affine.mlir @@ -89,6 +89,7 @@ module { %34 = bufferization.to_memref %cst_0 : memref<768xf32> %35 = bufferization.to_memref %cst : memref %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x128xi1> + // Node0 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { %88 = affine.load %2[%c0, %arg4] : memref<1x128xi64> @@ -100,6 +101,7 @@ module { %expanded = tensor.expand_shape %36 [[0, 1], [2, 3, 4, 5]] : tensor<1x128xi1> into tensor<1x1x1x1x1x128xi1> %37 = bufferization.to_memref %expanded : memref<1x1x1x1x1x128xi1> %alloc_47 = memref.alloc() {alignment = 64 : i64} : memref<1x1x128x1x1x128xi1> + // Node1 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 1 { affine.for %arg5 = 0 to 128 { @@ -119,6 +121,7 @@ module { %expanded_48 = tensor.expand_shape %collapsed [[0], [1, 2], [3]] : tensor<1x128x128xi1> into tensor<1x1x128x128xi1> %39 = bufferization.to_memref %expanded_48 : memref<1x1x128x128xi1> %alloc_49 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node2 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -136,6 +139,7 @@ module { %extracted_slice = tensor.extract_slice %arg0[0, 0, 0] [1, 128, 768] [1, 1, 1] : tensor<1x512x768xf32> to tensor<1x128x768xf32> %40 = bufferization.to_memref %extracted_slice : memref<1x128x768xf32> %alloc_50 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node3 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -147,6 +151,7 @@ module { } } %alloc_51 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node4 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -162,6 +167,7 @@ module { } } %alloc_52 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node5 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -173,6 +179,7 @@ module { } } %alloc_53 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> + // Node6 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -182,6 +189,7 @@ module { } %alloc_54 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> memref.copy %alloc_53, %alloc_54 : memref<1x128x1xf32> to memref<1x128x1xf32> + // Node7 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -193,6 +201,7 @@ module { } } %alloc_55 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> + // Node8 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -203,6 +212,7 @@ module { } } %alloc_56 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf64> + // Node9 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -213,6 +223,7 @@ module { } } %alloc_57 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf64> + // Node10 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -222,6 +233,7 @@ module { } %alloc_58 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf64> memref.copy %alloc_57, %alloc_58 : memref<1x128x1xf64> to memref<1x128x1xf64> + // Node11 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -233,6 +245,7 @@ module { } } %alloc_59 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf64> + // Node12 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -243,6 +256,7 @@ module { } } %alloc_60 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf64> + // Node13 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -254,6 +268,7 @@ module { } } %alloc_61 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf64> + // Node14 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -266,6 +281,7 @@ module { } %alloc_62 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf64> memref.copy %alloc_57, %alloc_62 : memref<1x128x1xf64> to memref<1x128x1xf64> + // Node15 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -277,6 +293,7 @@ module { } } %alloc_63 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf64> + // Node16 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -287,6 +304,7 @@ module { } } %alloc_64 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> + // Node17 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -297,6 +315,7 @@ module { } } %alloc_65 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> + // Node18 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -307,6 +326,7 @@ module { } } %alloc_66 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node19 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -318,6 +338,7 @@ module { } } %alloc_67 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node20 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -329,6 +350,7 @@ module { } } %alloc_68 = memref.alloc() {alignment = 64 : i64} : memref<1x128x1xf32> + // Node21 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 1 { @@ -340,6 +362,7 @@ module { } } %alloc_69 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node22 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -351,6 +374,7 @@ module { } } %alloc_70 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node23 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -362,6 +386,7 @@ module { } } %alloc_71 = memref.alloc() {alignment = 64 : i64} : memref<768x768xf32> + // Node24 affine.for %arg3 = 0 to 768 { affine.for %arg4 = 0 to 768 { %88 = affine.load %5[%arg3, %arg4] : memref<768x768xf32> @@ -369,6 +394,7 @@ module { } } %alloc_72 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node25 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -378,6 +404,7 @@ module { } } %alloc_73 = memref.alloc() {alignment = 64 : i64} : memref<1x768x768xf32> + // Node26 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 768 { affine.for %arg5 = 0 to 768 { @@ -387,6 +414,7 @@ module { } } %alloc_74 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node27 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -396,6 +424,7 @@ module { } %alloc_75 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> memref.copy %alloc_74, %alloc_75 : memref<1x128x768xf32> to memref<1x128x768xf32> + // Node28 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -411,6 +440,7 @@ module { } } %alloc_76 = memref.alloc() {alignment = 64 : i64} : memref<1x128x768xf32> + // Node29 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 768 { @@ -425,6 +455,7 @@ module { %expanded_77 = tensor.expand_shape %41 [[0], [1], [2, 3]] : tensor<1x128x768xf32> into tensor<1x128x12x64xf32> %42 = bufferization.to_memref %expanded_77 : memref<1x128x12x64xf32> %alloc_78 = memref.alloc() {alignment = 64 : i64} : memref<1x12x128x64xf32> + // Node30 affine.for %arg3 = 0 to 1 { affine.for %arg4 = 0 to 128 { affine.for %arg5 = 0 to 12 {