-
Notifications
You must be signed in to change notification settings - Fork 15
feat: Implement TOSA to Taskflow lowering pipeline #245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
609d337
feat: Implement TOSA to Taskflow lowering pipeline
guosran 8c51f9d
Update TOSA lowering pipeline implementation
guosran 7d40ae8
Refactor: functionality for progressive TOSA lowering
guosran ece22eb
Enhance: enable TOSA optimization passes and add fusion test
guosran 6c406f7
Refactor: remove C++ Tosa pipeline and add Python E2E test
guosran 4781475
Add Python E2E test with lit wrapper
guosran ba07e28
Update submodule to fix relu_int.cpp content
guosran dfc1f17
Revert submodule pointer to valid upstream commit
guosran 83cddd3
Fix regression tests: update explicit pipelines and check lines
guosran 527cfdd
Fix tosa_e2e.py to support remote environment by passing tool paths
guosran 904d98c
Simplify e2e test into a single .mlir file
guosran File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| add_mlir_library(MLIRTosaToTaskflowPipeline | ||
| TosaToTaskflowPipeline.cpp | ||
|
|
||
| DEPENDS | ||
| MLIRConversionIncGen | ||
|
|
||
| LINK_LIBS PUBLIC | ||
| MLIRPass | ||
| MLIRTosaDialect | ||
| MLIRLinalgDialect | ||
| MLIRLinalgTransforms | ||
| MLIRAffineDialect | ||
| MLIRArithDialect | ||
| MLIRTensorDialect | ||
| MLIRMemRefDialect | ||
| MLIRFuncDialect | ||
| MLIRBufferizationDialect | ||
| MLIRBufferizationTransforms | ||
| MLIRTaskflow | ||
| MLIRAffineToTaskflowPass | ||
| MLIRTosaToLinalg | ||
| MLIRTosaToTensor | ||
| MLIRTosaToArith | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| #include "mlir/Conversion/AffineToStandard/AffineToStandard.h" | ||
| #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h" | ||
| #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h" | ||
| #include "mlir/Conversion/Passes.h" | ||
| #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" | ||
guosran marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #include "mlir/Conversion/TosaToArith/TosaToArith.h" | ||
| #include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" | ||
| #include "mlir/Conversion/TosaToTensor/TosaToTensor.h" | ||
| #include "mlir/Dialect/Affine/IR/AffineOps.h" | ||
| #include "mlir/Dialect/Arith/IR/Arith.h" | ||
| #include "mlir/Dialect/Bufferization/IR/Bufferization.h" | ||
| #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" | ||
| #include "mlir/Dialect/Bufferization/Transforms/Passes.h" | ||
| #include "mlir/Dialect/Func/IR/FuncOps.h" | ||
| #include "mlir/Dialect/Linalg/IR/Linalg.h" | ||
| #include "mlir/Dialect/Linalg/Passes.h" | ||
| #include "mlir/Dialect/MemRef/IR/MemRef.h" | ||
| #include "mlir/Dialect/MemRef/Transforms/Passes.h" | ||
| #include "mlir/Dialect/Tensor/IR/Tensor.h" | ||
| #include "mlir/Dialect/Tosa/IR/TosaOps.h" | ||
| #include "mlir/Pass/Pass.h" | ||
| #include "mlir/Pass/PassManager.h" | ||
| #include "mlir/Transforms/Passes.h" | ||
|
|
||
| #include "Conversion/ConversionPasses.h" | ||
|
|
||
| using namespace mlir; | ||
|
|
||
| namespace { | ||
| void buildTosaToAffinePipeline(OpPassManager &pm) { | ||
| // 0. TOSA Optimizations | ||
| // These passes must run on func::FuncOp | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaInferShapesPass()); | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaMakeBroadcastablePass()); | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaLayerwiseConstantFoldPass()); | ||
|
|
||
| // 1. TOSA to Linalg/Arith/Tensor | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalgNamed()); | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalg()); | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaToArith()); | ||
| pm.addNestedPass<func::FuncOp>(tosa::createTosaToTensor()); | ||
|
|
||
| // 2. Linalg optimizations | ||
| pm.addNestedPass<func::FuncOp>(createLinalgElementwiseOpFusionPass()); | ||
| pm.addNestedPass<func::FuncOp>(createConvertTensorToLinalgPass()); | ||
|
|
||
| // 3. One-shot bufferization | ||
| bufferization::OneShotBufferizationOptions bufOpts; | ||
| bufOpts.bufferizeFunctionBoundaries = true; | ||
| bufOpts.setFunctionBoundaryTypeConversion( | ||
| bufferization::LayoutMapOption::IdentityLayoutMap); | ||
| bufOpts.functionArgTypeConverterFn = [](TensorType tensorType, Attribute memorySpace, | ||
| func::FuncOp funcOp, const bufferization::BufferizationOptions &options) { | ||
| return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, memorySpace); | ||
| }; | ||
| pm.addPass(bufferization::createOneShotBufferizePass(bufOpts)); | ||
| pm.addPass(bufferization::createBufferResultsToOutParamsPass()); | ||
| pm.addPass(createCanonicalizerPass()); | ||
|
|
||
| // 4. Linalg to Affine | ||
| pm.addNestedPass<func::FuncOp>(createConvertLinalgToAffineLoopsPass()); | ||
| pm.addNestedPass<func::FuncOp>(memref::createFoldMemRefAliasOpsPass()); | ||
| pm.addPass(createCanonicalizerPass()); | ||
| pm.addPass(createCSEPass()); | ||
| } | ||
|
|
||
| void buildTosaToTaskflowPipeline(OpPassManager &pm) { | ||
| // 1. TOSA to Affine (foundational pipeline) | ||
| buildTosaToAffinePipeline(pm); | ||
|
|
||
| // 2. Affine to Taskflow | ||
| pm.addPass(createConvertAffineToTaskflowPass()); | ||
| } | ||
| } // namespace | ||
|
|
||
| void mlir::registerTosaToAffinePipeline() { | ||
| PassPipelineRegistration<>( | ||
| "tosa-to-affine-pipeline", | ||
| "Lower TOSA to Affine dialect (TOSA -> Linalg -> Affine).", | ||
| buildTosaToAffinePipeline); | ||
| } | ||
|
|
||
| void mlir::registerTosaToTaskflowPipeline() { | ||
| PassPipelineRegistration<>( | ||
| "tosa-to-taskflow-pipeline", | ||
| "Lower TOSA to Taskflow dialect through Linalg and Affine.", | ||
| buildTosaToTaskflowPipeline); | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| // RUN: mlir-neura-opt --convert-affine-to-taskflow %s 2>/dev/null | FileCheck %s | ||
|
|
||
| // Test Affine to Taskflow conversion | ||
| module { | ||
| func.func @simple_add(%arg0: memref<16xf32>, %arg1: memref<16xf32>, %arg2: memref<16xf32>) { | ||
| affine.for %i = 0 to 16 { | ||
| %0 = affine.load %arg0[%i] : memref<16xf32> | ||
| %1 = affine.load %arg1[%i] : memref<16xf32> | ||
| %2 = arith.addf %0, %1 : f32 | ||
| affine.store %2, %arg2[%i] : memref<16xf32> | ||
| } | ||
| return | ||
| } | ||
| } | ||
|
|
||
| // CHECK-LABEL: func.func @simple_add | ||
| // CHECK-NEXT: %memory_outputs = "taskflow.task"(%arg0, %arg1, %arg2) | ||
| // CHECK-SAME: task_name = "Task_0" | ||
| // CHECK-NEXT: ^bb0(%arg3: memref<16xf32>, %arg4: memref<16xf32>, %arg5: memref<16xf32>): | ||
| // CHECK-NEXT: affine.for %arg6 = 0 to 16 { | ||
| // CHECK-NEXT: %0 = affine.load %arg3[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: %1 = affine.load %arg4[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: %2 = arith.addf %0, %1 : f32 | ||
| // CHECK-NEXT: affine.store %2, %arg5[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: } | ||
| // CHECK-NEXT: "taskflow.yield"(%arg5) | ||
| // CHECK: return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| // RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s | ||
|
|
||
| // Test Linalg fusion capability | ||
| // We chain multiple elementwise ops. If fusion works, we should see ONE loop nest. | ||
| func.func @fusion_test(%arg0: tensor<16xf32>) -> tensor<16xf32> { | ||
| %0 = tosa.add %arg0, %arg0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32> | ||
| %1 = tosa.mul %0, %0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32> | ||
|
|
||
| // A simple relu-like operation: max(0, x) | ||
| %zeros = "tosa.const"() {value = dense<0.0> : tensor<16xf32>} : () -> tensor<16xf32> | ||
| %2 = tosa.maximum %1, %zeros : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32> | ||
|
|
||
| return %2 : tensor<16xf32> | ||
| } | ||
|
|
||
| // CHECK-LABEL: func.func @fusion_test | ||
| // CHECK-SAME: (%arg0: memref<16xf32>, %arg1: memref<16xf32>) | ||
| // CHECK: %cst = arith.constant 0.000000e+00 : f32 | ||
| // CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32> | ||
| // CHECK-NEXT: affine.for %arg2 = 0 to 16 { | ||
| // CHECK-NEXT: %0 = affine.load %arg0[%arg2] : memref<16xf32> | ||
| // CHECK-NEXT: %1 = arith.addf %0, %0 : f32 | ||
| // CHECK-NEXT: %2 = arith.mulf %1, %1 : f32 | ||
| // CHECK-NEXT: %3 = arith.maximumf %2, %cst : f32 | ||
| // CHECK-NEXT: affine.store %3, %alloc[%arg2] : memref<16xf32> | ||
| // CHECK-NEXT: } | ||
| // CHECK-NEXT: memref.copy %alloc, %arg1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| // RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s | ||
|
|
||
| // Test TOSA optimization (constant folding) with arith.constant | ||
| func.func @const_fold_test() -> tensor<4xf32> { | ||
| %cst1 = arith.constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32> | ||
| %cst2 = arith.constant dense<[10.0, 20.0, 30.0, 40.0]> : tensor<4xf32> | ||
|
|
||
| // This add should be constant folded by TOSA before lowering to Linalg | ||
| %folded = tosa.add %cst1, %cst2 : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> | ||
| return %folded : tensor<4xf32> | ||
| } | ||
|
|
||
| // CHECK-LABEL: func.func @const_fold_test | ||
| // TODO: This should be folded to a memory copy of a global constant. | ||
| // Currently TOSA constant folding is not triggering as expected, so we check for the runtime op. | ||
| // CHECK: %0 = memref.get_global @__constant_4xf32 : memref<4xf32> | ||
| // CHECK-NEXT: %1 = memref.get_global @__constant_4xf32_0 : memref<4xf32> | ||
| // CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<4xf32> | ||
| // CHECK-NEXT: affine.for %arg1 = 0 to 4 { | ||
| // CHECK-NEXT: %2 = affine.load %0[%arg1] : memref<4xf32> | ||
| // CHECK-NEXT: %3 = affine.load %1[%arg1] : memref<4xf32> | ||
| // CHECK-NEXT: %4 = arith.addf %2, %3 : f32 | ||
| // CHECK-NEXT: affine.store %4, %alloc[%arg1] : memref<4xf32> | ||
| // CHECK-NEXT: } | ||
| // CHECK-NEXT: memref.copy %alloc, %arg0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| // RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s | ||
|
|
||
| // Test TOSA to Affine lowering | ||
| func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> { | ||
| %0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32> | ||
| return %0 : tensor<16xf32> | ||
| } | ||
|
|
||
| // CHECK-LABEL: func.func @simple_add | ||
| // CHECK-SAME: (%arg0: memref<16xf32>, %arg1: memref<16xf32>, %arg2: memref<16xf32>) | ||
| // CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32> | ||
| // CHECK-NEXT: affine.for %arg3 = 0 to 16 { | ||
| // CHECK-NEXT: %0 = affine.load %arg0[%arg3] : memref<16xf32> | ||
| // CHECK-NEXT: %1 = affine.load %arg1[%arg3] : memref<16xf32> | ||
| // CHECK-NEXT: %2 = arith.addf %0, %1 : f32 | ||
| // CHECK-NEXT: affine.store %2, %alloc[%arg3] : memref<16xf32> | ||
| // CHECK-NEXT: } | ||
| // CHECK-NEXT: memref.copy %alloc, %arg2 | ||
| // CHECK-NEXT: return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| // RUN: mlir-neura-opt --tosa-to-taskflow-pipeline %s 2>&1 | FileCheck %s | ||
| // Simple TOSA add lowering test | ||
|
|
||
| func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> { | ||
| %0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32> | ||
| return %0 : tensor<16xf32> | ||
| } | ||
|
|
||
| // CHECK-LABEL: func.func @simple_add | ||
| // CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32> | ||
| // CHECK-NEXT: %[[RES:.*]] = "taskflow.task"(%arg0, %arg1, %alloc) | ||
| // CHECK-SAME: task_name = "Task_0" | ||
| // CHECK-NEXT: ^bb0(%arg3: memref<16xf32>, %arg4: memref<16xf32>, %arg5: memref<16xf32>): | ||
| // CHECK-NEXT: affine.for %arg6 = 0 to 16 { | ||
| // CHECK-NEXT: %0 = affine.load %arg3[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: %1 = affine.load %arg4[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: %2 = arith.addf %0, %1 : f32 | ||
| // CHECK-NEXT: affine.store %2, %arg5[%arg6] : memref<16xf32> | ||
| // CHECK-NEXT: } | ||
| // CHECK-NEXT: "taskflow.yield"(%arg5) | ||
| // CHECK: memref.copy %[[RES]], %arg2 | ||
| // CHECK-NEXT: return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.