diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index ac6946af..8e15b239 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -324,6 +324,14 @@ def Neura_FMulFAddOp : Op { let traits = [SameOperandsAndResultElementType]; } +def Neura_MulAddOp : Op { + let summary = "Fused add(mul(a, b), c)"; + let arguments = (ins AnyType:$a, AnyType:$b, AnyType:$c, Optional:$predicate); + let results = (outs AnyType:$result); + // let assemblyFormat = "$a `,` $b `,` $c `,` $predicate attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + // ---------------------------------------------------- // Defines move operations. def Neura_DataMovOp : Op { diff --git a/lib/NeuraDialect/Transforms/FusePatternPass.cpp b/lib/NeuraDialect/Transforms/FusePatternPass.cpp index a3ddb349..ab47d383 100644 --- a/lib/NeuraDialect/Transforms/FusePatternPass.cpp +++ b/lib/NeuraDialect/Transforms/FusePatternPass.cpp @@ -15,6 +15,11 @@ struct FuseFAddFAddPattern : public OpRewritePattern { LogicalResult matchAndRewrite(neura::FAddOp second, PatternRewriter &rewriter) const override { + // Checks if rhs exists before trying to get its defining op. + if (!second.getRhs()) { + return failure(); + } + Value lhs = second.getLhs(); Value rhs = second.getRhs(); @@ -61,6 +66,11 @@ struct FuseFMulFAddPattern : public OpRewritePattern { LogicalResult matchAndRewrite(neura::FAddOp add, PatternRewriter &rewriter) const override { + // Checks if rhs exists before trying to get its defining op. + if (!add.getRhs()) { + return failure(); + } + auto lhs_op = add.getLhs().getDefiningOp(); auto rhs_op = add.getRhs().getDefiningOp(); @@ -82,7 +92,7 @@ struct FuseFMulFAddPattern : public OpRewritePattern { return failure(); } - // Optional: only fuses if fmul has a single use. + // Optionally fuses if fmul has a single use. if (!fmul->hasOneUse()) { return failure(); } @@ -99,6 +109,119 @@ struct FuseFMulFAddPattern : public OpRewritePattern { } }; +struct FuseGepLoadPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(neura::LoadOp load, + PatternRewriter &rewriter) const override { + Value addr = load.getAddr(); + auto gep_op = addr.getDefiningOp(); + + if (!gep_op) + return failure(); + + // Only fuses if the gep has a single use. + if (!gep_op->hasOneUse()) + return failure(); + + Location loc = load.getLoc(); + Type type = load.getType(); + + // Creates the fused operation with base and indices from gep. + SmallVector indexValues; + for (auto gepIndex : gep_op.getIndicesAndPredicate()) { + indexValues.push_back(gepIndex); + } + + auto fused = rewriter.create( + loc, type, gep_op.getBase(), indexValues); + + rewriter.replaceOp(load, fused.getResult()); + rewriter.eraseOp(gep_op); + return success(); + } +}; + +struct FuseGepStorePattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(neura::StoreOp store, + PatternRewriter &rewriter) const override { + Value addr = store.getAddr(); + auto gep_op = addr.getDefiningOp(); + + if (!gep_op) + return failure(); + + // Only fuses if the gep has a single use. + if (!gep_op->hasOneUse()) + return failure(); + + Location loc = store.getLoc(); + + // Creates the fused operation with base and indices from gep. + SmallVector indexValues; + for (auto gepIndex : gep_op.getIndicesAndPredicate()) { + indexValues.push_back(gepIndex); + } + + rewriter.create( + loc, store.getValue(), gep_op.getBase(), indexValues); + + rewriter.eraseOp(store); + rewriter.eraseOp(gep_op); + return success(); + } +}; + +struct FuseMulAddPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(neura::AddOp add, + PatternRewriter &rewriter) const override { + // Checks if rhs exists before trying to get its defining op. + if (!add.getRhs()) { + return failure(); + } + + auto lhs_op = add.getLhs().getDefiningOp(); + auto rhs_op = add.getRhs().getDefiningOp(); + + neura::MulOp mul = nullptr; + Value other; + + // Case 1: mul is on the LHS. + if (lhs_op && add.getRhs()) { + mul = lhs_op; + other = add.getRhs(); + } + // Case 2: mul is on the RHS. + else if (rhs_op && add.getLhs()) { + mul = rhs_op; + other = add.getLhs(); + } + + if (!mul) { + return failure(); + } + + // Only fuses if mul has a single use. + if (!mul->hasOneUse()) { + return failure(); + } + + Location loc = add.getLoc(); + Type type = add.getType(); + + auto fused = rewriter.create( + loc, type, mul.getLhs(), mul.getRhs(), other, Value()); + + rewriter.replaceOp(add, fused.getResult()); + rewriter.eraseOp(mul); + return success(); + } +}; + struct FusePatternPass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(FusePatternPass) @@ -113,6 +236,9 @@ struct FusePatternPass RewritePatternSet patterns(&getContext()); patterns.add(&getContext(), 2); patterns.add(&getContext(), 3); + patterns.add(&getContext(), 4); + patterns.add(&getContext(), 5); + patterns.add(&getContext(), 6); FrozenRewritePatternSet frozen(std::move(patterns)); // Applies to every region inside the module (regardless of func type, diff --git a/test/neura/for_loop/kernel_test.mlir b/test/neura/for_loop/kernel_test.mlir index 75567013..5190b5cb 100644 --- a/test/neura/for_loop/kernel_test.mlir +++ b/test/neura/for_loop/kernel_test.mlir @@ -32,7 +32,7 @@ // RUN: --insert-data-mov \ // RUN: | FileCheck %s --check-prefix=CHECK-MOV -// CHECK: func.func +// CHECK: func.func @_Z6kernelPfS_S_ // CHECK: accelerator = "neura" // CHECK-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data // CHECK-NEXT: %1 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data @@ -58,7 +58,7 @@ // CHECK-NEXT: } // Verifies the neura ops are generated. And fusion happens. -// CHECK-FUSED: func.func +// CHECK-FUSED: func.func @_Z6kernelPfS_S_ // CHECK-FUSED: accelerator = "neura" // CHECK-FUSED-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data // CHECK-FUSED-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data @@ -83,33 +83,31 @@ // CHECK-FUSED-NEXT: %20 = "neura.phi"(%19, %8) : (!neura.data, !neura.data) -> !neura.data // CHECK-FUSED-NEXT: %21 = neura.reserve : !neura.data // CHECK-FUSED-NEXT: %22 = "neura.phi"(%21, %4) : (!neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %23 = "neura.gep"(%18, %22) : (!neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %24 = "neura.load"(%23) : (!neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %25 = "neura.gep"(%16, %22) : (!neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %26 = "neura.load"(%25) : (!neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %27 = "neura.fmul_fadd"(%24, %26, %20) : (!neura.data, !neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: "neura.store"(%27, %14) : (!neura.data, !neura.data) -> () -// CHECK-FUSED-NEXT: %28 = "neura.add"(%22, %12) : (!neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %29 = "neura.icmp"(%28, %10) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %30 = "neura.not"(%29) : (!neura.data) -> !neura.data -// CHECK-FUSED-NEXT: %31 = neura.grant_predicate %28, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %31 -> %21 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %32 = neura.grant_predicate %27, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %32 -> %19 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %33 = neura.grant_predicate %18, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %33 -> %17 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %34 = neura.grant_predicate %16, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %34 -> %15 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %35 = neura.grant_predicate %14, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %35 -> %13 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %36 = neura.grant_predicate %12, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %36 -> %11 : !neura.data !neura.data -// CHECK-FUSED-NEXT: %37 = neura.grant_predicate %10, %30 : !neura.data, !neura.data -> !neura.data -// CHECK-FUSED-NEXT: neura.ctrl_mov %37 -> %9 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %23 = neura.load_indexed %18[%22 : !neura.data] !neura.data : !neura.data +// CHECK-FUSED-NEXT: %24 = neura.load_indexed %16[%22 : !neura.data] !neura.data : !neura.data +// CHECK-FUSED-NEXT: %25 = "neura.fmul_fadd"(%23, %24, %20) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CHECK-FUSED-NEXT: "neura.store"(%25, %14) : (!neura.data, !neura.data) -> () +// CHECK-FUSED-NEXT: %26 = "neura.add"(%22, %12) : (!neura.data, !neura.data) -> !neura.data +// CHECK-FUSED-NEXT: %27 = "neura.icmp"(%26, %10) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-FUSED-NEXT: %28 = "neura.not"(%27) : (!neura.data) -> !neura.data +// CHECK-FUSED-NEXT: %29 = neura.grant_predicate %26, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %29 -> %21 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %30 = neura.grant_predicate %25, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %30 -> %19 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %31 = neura.grant_predicate %18, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %31 -> %17 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %32 = neura.grant_predicate %16, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %32 -> %15 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %33 = neura.grant_predicate %14, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %33 -> %13 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %34 = neura.grant_predicate %12, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %34 -> %11 : !neura.data !neura.data +// CHECK-FUSED-NEXT: %35 = neura.grant_predicate %10, %28 : !neura.data, !neura.data -> !neura.data +// CHECK-FUSED-NEXT: neura.ctrl_mov %35 -> %9 : !neura.data !neura.data // CHECK-FUSED-NEXT: "neura.return"() : () -> () // CHECK-FUSED-NEXT: } -// CHECK-MOV: func.func +// CHECK-MOV: func.func @_Z6kernelPfS_S_ // CHECK-MOV: accelerator = "neura" // CHECK-MOV-NEXT: %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data // CHECK-MOV-NEXT: %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data @@ -145,56 +143,52 @@ // CHECK-MOV-NEXT: %31 = "neura.phi"(%29, %30) : (!neura.data, !neura.data) -> !neura.data // CHECK-MOV-NEXT: %32 = "neura.data_mov"(%25) : (!neura.data) -> !neura.data // CHECK-MOV-NEXT: %33 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %34 = "neura.gep"(%32, %33) : (!neura.data, !neura.data) -> !neura.data -// CHECK-MOV-NEXT: %35 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %36 = "neura.load"(%35) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %37 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %38 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %39 = "neura.gep"(%37, %38) : (!neura.data, !neura.data) -> !neura.data -// CHECK-MOV-NEXT: %40 = "neura.data_mov"(%39) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %41 = "neura.load"(%40) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %42 = "neura.data_mov"(%36) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %43 = "neura.data_mov"(%41) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %44 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %45 = "neura.fmul_fadd"(%42, %43, %44) : (!neura.data, !neura.data, !neura.data) -> !neura.data -// CHECK-MOV-NEXT: %46 = "neura.data_mov"(%45) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %47 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: "neura.store"(%46, %47) : (!neura.data, !neura.data) -> () -// CHECK-MOV-NEXT: %48 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %49 = "neura.data_mov"(%16) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %50 = "neura.add"(%48, %49) : (!neura.data, !neura.data) -> !neura.data -// CHECK-MOV-NEXT: %51 = "neura.data_mov"(%50) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %52 = "neura.data_mov"(%13) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %53 = "neura.icmp"(%51, %52) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data -// CHECK-MOV-NEXT: %54 = "neura.data_mov"(%53) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %55 = "neura.not"(%54) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %56 = "neura.data_mov"(%50) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %57 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %58 = neura.grant_predicate %56, %57 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %58 -> %29 : !neura.data !neura.data -// CHECK-MOV-NEXT: %59 = "neura.data_mov"(%45) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %60 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %61 = neura.grant_predicate %59, %60 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %61 -> %26 : !neura.data !neura.data -// CHECK-MOV-NEXT: %62 = "neura.data_mov"(%25) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %63 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %64 = neura.grant_predicate %62, %63 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %64 -> %23 : !neura.data !neura.data -// CHECK-MOV-NEXT: %65 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %66 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %67 = neura.grant_predicate %65, %66 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %67 -> %20 : !neura.data !neura.data -// CHECK-MOV-NEXT: %68 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %69 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %70 = neura.grant_predicate %68, %69 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %70 -> %17 : !neura.data !neura.data -// CHECK-MOV-NEXT: %71 = "neura.data_mov"(%16) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %72 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %73 = neura.grant_predicate %71, %72 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %73 -> %14 : !neura.data !neura.data -// CHECK-MOV-NEXT: %74 = "neura.data_mov"(%13) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %75 = "neura.data_mov"(%55) : (!neura.data) -> !neura.data -// CHECK-MOV-NEXT: %76 = neura.grant_predicate %74, %75 : !neura.data, !neura.data -> !neura.data -// CHECK-MOV-NEXT: neura.ctrl_mov %76 -> %11 : !neura.data !neura.data +// CHECK-MOV-NEXT: %34 = neura.load_indexed %32[%33 : !neura.data] !neura.data : !neura.data +// CHECK-MOV-NEXT: %35 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %36 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %37 = neura.load_indexed %35[%36 : !neura.data] !neura.data : !neura.data +// CHECK-MOV-NEXT: %38 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %39 = "neura.data_mov"(%37) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %40 = "neura.data_mov"(%28) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %41 = "neura.fmul_fadd"(%38, %39, %40) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CHECK-MOV-NEXT: %42 = "neura.data_mov"(%41) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %43 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: "neura.store"(%42, %43) : (!neura.data, !neura.data) -> () +// CHECK-MOV-NEXT: %44 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %45 = "neura.data_mov"(%16) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %46 = "neura.add"(%44, %45) : (!neura.data, !neura.data) -> !neura.data +// CHECK-MOV-NEXT: %47 = "neura.data_mov"(%46) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %48 = "neura.data_mov"(%13) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %49 = "neura.icmp"(%47, %48) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data +// CHECK-MOV-NEXT: %50 = "neura.data_mov"(%49) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %51 = "neura.not"(%50) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %52 = "neura.data_mov"(%46) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %53 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %54 = neura.grant_predicate %52, %53 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %54 -> %29 : !neura.data !neura.data +// CHECK-MOV-NEXT: %55 = "neura.data_mov"(%41) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %56 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %57 = neura.grant_predicate %55, %56 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %57 -> %26 : !neura.data !neura.data +// CHECK-MOV-NEXT: %58 = "neura.data_mov"(%25) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %59 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %60 = neura.grant_predicate %58, %59 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %60 -> %23 : !neura.data !neura.data +// CHECK-MOV-NEXT: %61 = "neura.data_mov"(%22) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %62 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %63 = neura.grant_predicate %61, %62 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %63 -> %20 : !neura.data !neura.data +// CHECK-MOV-NEXT: %64 = "neura.data_mov"(%19) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %65 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %66 = neura.grant_predicate %64, %65 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %66 -> %17 : !neura.data !neura.data +// CHECK-MOV-NEXT: %67 = "neura.data_mov"(%16) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %68 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %69 = neura.grant_predicate %67, %68 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %69 -> %14 : !neura.data !neura.data +// CHECK-MOV-NEXT: %70 = "neura.data_mov"(%13) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %71 = "neura.data_mov"(%51) : (!neura.data) -> !neura.data +// CHECK-MOV-NEXT: %72 = neura.grant_predicate %70, %71 : !neura.data, !neura.data -> !neura.data +// CHECK-MOV-NEXT: neura.ctrl_mov %72 -> %11 : !neura.data !neura.data // CHECK-MOV-NEXT: "neura.return"() : () -> () // CHECK-MOV-NEXT: } \ No newline at end of file diff --git a/test/neura/fusion/kernel.cpp b/test/neura/fusion/kernel.cpp new file mode 100644 index 00000000..fe0b0e74 --- /dev/null +++ b/test/neura/fusion/kernel.cpp @@ -0,0 +1,26 @@ +#include +#include + +#define NTAPS 1024 + +int A[NTAPS][NTAPS]; +int s[NTAPS]; +int q[NTAPS]; +int p[NTAPS]; +int r[NTAPS]; + +void kernel(int A[][NTAPS], int s[], int q[], int p[], int r[]) { + int i, j; + + for (i = 0; i < NTAPS; i++) { + for (j = 0; j < NTAPS; j++) { + s[j] = s[j] + r[i] * A[i][j]; + q[i] = q[i] + A[i][j] * p[j]; + } + } +} + +int main() { + kernel(A, s, q, p, r); +} + diff --git a/test/neura/fusion/test.mlir b/test/neura/fusion/test.mlir new file mode 100644 index 00000000..9db09a38 --- /dev/null +++ b/test/neura/fusion/test.mlir @@ -0,0 +1,32 @@ +# RUN: clang++ -S -emit-llvm -O3 -fno-unroll-loops -fno-vectorize -o %t-kernel.ll kernel.cpp +# RUN: mlir-translate --import-llvm %t-kernel.ll -o %t-kernel.mlir +# RUN: mlir-neura-opt --assign-accelerator \ +# RUN: --lower-llvm-to-neura \ +# RUN: --canonicalize-live-in \ +# RUN: --leverage-predicated-value \ +# RUN: --fold-constant \ +# RUN: --transform-ctrl-to-data-flow \ +# RUN: --fold-constant \ +# RUN: --fuse-pattern \ +# RUN: --view-op-graph \ +# RUN: --insert-data-mov %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-FUSED + +# RUN: mlir-neura-opt --assign-accelerator \ +# RUN: --lower-llvm-to-neura \ +# RUN: --canonicalize-live-in \ +# RUN: --leverage-predicated-value \ +# RUN: --fold-constant \ +# RUN: --transform-ctrl-to-data-flow \ +# RUN: --fold-constant \ +# RUN: --fuse-pattern \ +# RUN: --insert-data-mov \ +# RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=customized" %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-MAPPING + +# CHECK-FUSED: func.func +# CHECK-FUSED: accelerator = "neura" +# CHECK-FUSED: %102 = neura.load_indexed %100[%101 : !neura.data] !neura.data : !neura.data +# CHECK-FUSED: %33 = "neura.mul_add"(%30, %31, %32) : (i32, i32, i32) -> i32 +# CHECK-FUSED: %42 = "neura.mul_add"(%39, %40, %41) : (i32, i32, i32) -> i32 + +# CHECK-MAPPING: mapping_info = {compiled_ii = 18 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 5 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32} +# CHECK-MAPPING: mapping_locs \ No newline at end of file