Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/Conversion/ConversionPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ std::unique_ptr<mlir::Pass> createLowerAffineToNeuraPass();

// TaskFlow Conversion Passes.
std::unique_ptr<mlir::Pass> createConvertAffineToTaskflowPass();

#define GEN_PASS_REGISTRATION
#include "Conversion/ConversionPasses.h.inc"

Expand Down
27 changes: 27 additions & 0 deletions test/Conversion/TosaToTaskflow/affine-to-taskflow.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: mlir-neura-opt --convert-affine-to-taskflow %s 2>/dev/null | FileCheck %s

// Test Affine to Taskflow conversion
module {
func.func @simple_add(%arg0: memref<16xf32>, %arg1: memref<16xf32>, %arg2: memref<16xf32>) {
affine.for %i = 0 to 16 {
%0 = affine.load %arg0[%i] : memref<16xf32>
%1 = affine.load %arg1[%i] : memref<16xf32>
%2 = arith.addf %0, %1 : f32
affine.store %2, %arg2[%i] : memref<16xf32>
}
return
}
}

// CHECK-LABEL: func.func @simple_add
// CHECK-NEXT: %memory_outputs = "taskflow.task"(%arg0, %arg1, %arg2)
// CHECK-SAME: task_name = "Task_0"
// CHECK-NEXT: ^bb0(%arg3: memref<16xf32>, %arg4: memref<16xf32>, %arg5: memref<16xf32>):
// CHECK-NEXT: affine.for %arg6 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg3[%arg6] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %arg4[%arg6] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %arg5[%arg6] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: "taskflow.yield"(%arg5)
// CHECK: return
27 changes: 27 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-fusion.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: mlir-neura-opt --pass-pipeline='builtin.module(func.func(tosa-infer-shapes,tosa-make-broadcastable,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith,tosa-to-tensor,linalg-fuse-elementwise-ops),one-shot-bufferize{bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map},func.func(convert-linalg-to-affine-loops))' %s | FileCheck %s

// Test Linalg fusion capability
// We chain multiple elementwise ops. If fusion works, we should see ONE loop nest.
func.func @fusion_test(%arg0: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
%1 = tosa.mul %0, %0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>

// A simple relu-like operation: max(0, x)
%zeros = "tosa.const"() {value = dense<0.0> : tensor<16xf32>} : () -> tensor<16xf32>
%2 = tosa.maximum %1, %zeros : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>

return %2 : tensor<16xf32>
}

// CHECK-LABEL: func.func @fusion_test
// CHECK-SAME: (%arg0: memref<16xf32>) -> memref<16xf32>
// CHECK: %cst = arith.constant 0.000000e+00 : f32
// CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg0[%arg1] : memref<16xf32>
// CHECK-NEXT: %1 = arith.addf %0, %0 : f32
// CHECK-NEXT: %2 = arith.mulf %1, %1 : f32
// CHECK-NEXT: %3 = arith.maximumf %2, %cst : f32
// CHECK-NEXT: affine.store %3, %alloc[%arg1] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %alloc : memref<16xf32>
26 changes: 26 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-opt.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: mlir-neura-opt --pass-pipeline='builtin.module(func.func(tosa-infer-shapes,tosa-make-broadcastable,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith,tosa-to-tensor,linalg-fuse-elementwise-ops),one-shot-bufferize{bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map},func.func(convert-linalg-to-affine-loops))' %s | FileCheck %s

// Test TOSA optimization (constant folding) with arith.constant
func.func @const_fold_test() -> tensor<4xf32> {
%cst1 = arith.constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
%cst2 = arith.constant dense<[10.0, 20.0, 30.0, 40.0]> : tensor<4xf32>

// This add should be constant folded by TOSA before lowering to Linalg
%folded = tosa.add %cst1, %cst2 : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
return %folded : tensor<4xf32>
}

// CHECK-LABEL: func.func @const_fold_test
// CHECK-SAME: () -> memref<4xf32>
// TODO: This should be folded to a memory copy of a global constant.
// Currently TOSA constant folding is not triggering as expected, so we check for the runtime op.
// CHECK: %0 = memref.get_global @__constant_4xf32 : memref<4xf32>
// CHECK-NEXT: %1 = memref.get_global @__constant_4xf32_0 : memref<4xf32>
// CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<4xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 4 {
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<4xf32>
// CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<4xf32>
// CHECK-NEXT: %4 = arith.addf %2, %3 : f32
// CHECK-NEXT: affine.store %4, %alloc[%arg0] : memref<4xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %alloc : memref<4xf32>
18 changes: 18 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-to-affine.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: mlir-neura-opt --pass-pipeline='builtin.module(func.func(tosa-infer-shapes,tosa-make-broadcastable,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith,tosa-to-tensor,linalg-fuse-elementwise-ops),one-shot-bufferize{bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map},func.func(convert-linalg-to-affine-loops))' %s | FileCheck %s

// Test TOSA to Affine lowering
func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
return %0 : tensor<16xf32>
}

// CHECK-LABEL: func.func @simple_add
// CHECK-SAME: (%arg0: memref<16xf32>, %arg1: memref<16xf32>) -> memref<16xf32>
// CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK-NEXT: affine.for %arg2 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg0[%arg2] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %arg1[%arg2] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %alloc[%arg2] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %alloc : memref<16xf32>
21 changes: 21 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-to-taskflow.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// RUN: mlir-neura-opt --pass-pipeline='builtin.module(func.func(tosa-infer-shapes,tosa-make-broadcastable,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith,tosa-to-tensor,linalg-fuse-elementwise-ops),one-shot-bufferize{bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map},func.func(convert-linalg-to-affine-loops),convert-affine-to-taskflow)' %s 2>&1 | FileCheck %s
// Simple TOSA add lowering test

func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
return %0 : tensor<16xf32>
}

// CHECK-LABEL: func.func @simple_add
// CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK: %[[RES:.*]] = "taskflow.task"(%arg0, %arg1, %alloc)
// CHECK-SAME: task_name = "Task_0"
// CHECK-NEXT: ^bb0(%[[BA1:.*]]: memref<16xf32>, %[[BA2:.*]]: memref<16xf32>, %[[BA3:.*]]: memref<16xf32>):
// CHECK-NEXT: affine.for %[[IV:.*]] = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %[[BA1]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %[[BA2]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %[[BA3]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: "taskflow.yield"(%[[BA3]])
// CHECK: return %[[RES]] : memref<16xf32>
23 changes: 23 additions & 0 deletions test/e2e/tosa_e2e.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: mlir-neura-opt %s --pass-pipeline='builtin.module(func.func(tosa-infer-shapes,tosa-make-broadcastable,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith,tosa-to-tensor,linalg-fuse-elementwise-ops),one-shot-bufferize{bufferize-function-boundaries=1 function-boundary-type-conversion=identity-layout-map},func.func(convert-linalg-to-affine-loops),convert-affine-to-taskflow)' | FileCheck %s

// Verifies the end-to-end lowering from TOSA to Taskflow.
func.func @test_e2e(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
%1 = tosa.mul %0, %0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
return %1 : tensor<16xf32>
}

// CHECK-LABEL: func.func @test_e2e
// CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK: %[[RES:.*]] = "taskflow.task"(%arg0, %arg1, %alloc)
// CHECK-SAME: task_name = "Task_0"
// CHECK-NEXT: ^bb0(%[[BA1:.*]]: memref<16xf32>, %[[BA2:.*]]: memref<16xf32>, %[[BA3:.*]]: memref<16xf32>):
// CHECK-NEXT: affine.for %[[IV:.*]] = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %[[BA1]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %[[BA2]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: %3 = arith.mulf %2, %2 : f32
// CHECK-NEXT: affine.store %3, %[[BA3]][%[[IV]]] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: "taskflow.yield"(%[[BA3]])
// CHECK: return %[[RES]] : memref<16xf32>
10 changes: 10 additions & 0 deletions tools/mlir-neura-opt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ set(LIBS
MLIRConversion
MLIRNeura
MLIRTaskflow
MLIRTosaDialect
MLIRTosaTransforms
MLIRLinalgTransforms
MLIRArithTransforms
MLIRSCFTransforms
MLIRTensorTransforms
MLIRBufferizationDialect
MLIRBufferizationTransforms
MLIRFuncAllExtensions
MLIRTensorAllExtensions
MLIRTransforms
MLIROptLib
MLIRPass
Expand Down
17 changes: 17 additions & 0 deletions tools/mlir-neura-opt/mlir-neura-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/InitAllDialects.h"
#include "mlir/InitAllPasses.h"
#include "mlir/InitAllExtensions.h"
#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Tools/mlir-opt/MlirOptMain.h"
Expand Down Expand Up @@ -73,9 +81,18 @@ int main(int argc, char **argv) {
registry.insert<mlir::ml_program::MLProgramDialect>();
registry.insert<mlir::tensor::TensorDialect>();
registry.insert<mlir::linalg::LinalgDialect>();
registry.insert<mlir::tosa::TosaDialect>();
registry.insert<mlir::bufferization::BufferizationDialect>();
registry.insert<mlir::taskflow::TaskflowDialect>();
mlir::registerAllExtensions(registry);
mlir::linalg::registerBufferizableOpInterfaceExternalModels(registry);
mlir::tensor::registerBufferizableOpInterfaceExternalModels(registry);
mlir::arith::registerBufferizableOpInterfaceExternalModels(registry);
mlir::scf::registerBufferizableOpInterfaceExternalModels(registry);
mlir::bufferization::func_ext::registerBufferizableOpInterfaceExternalModels(registry);

mlir::neura::registerPasses();
mlir::registerAllPasses();
mlir::registerPasses();
mlir::registerViewOpGraphPass();
mlir::taskflow::registerPasses();
Expand Down