Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/Conversion/ConversionPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ std::unique_ptr<mlir::Pass> createLowerAffineToNeuraPass();

// TaskFlow Conversion Passes.
std::unique_ptr<mlir::Pass> createConvertAffineToTaskflowPass();
void registerTosaToAffinePipeline();
void registerTosaToTaskflowPipeline();

#define GEN_PASS_REGISTRATION
#include "Conversion/ConversionPasses.h.inc"

Expand Down
2 changes: 2 additions & 0 deletions lib/Conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_subdirectory(LlvmToNeura)
add_subdirectory(MemRefToNeura)
add_subdirectory(BuiltinToNeura)
add_subdirectory(AffineToTaskflow)
add_subdirectory(TosaToTaskflow)

add_library(MLIRConversion INTERFACE)

Expand All @@ -23,5 +24,6 @@ target_link_libraries(MLIRConversion INTERFACE
MLIRNeuraMemRefToNeuraPass
MLIRNeuraBuiltinToNeuraPass
MLIRAffineToTaskflowPass
MLIRTosaToTaskflowPipeline
${dialect_libs}
)
24 changes: 24 additions & 0 deletions lib/Conversion/TosaToTaskflow/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
add_mlir_library(MLIRTosaToTaskflowPipeline
TosaToTaskflowPipeline.cpp

DEPENDS
MLIRConversionIncGen

LINK_LIBS PUBLIC
MLIRPass
MLIRTosaDialect
MLIRLinalgDialect
MLIRLinalgTransforms
MLIRAffineDialect
MLIRArithDialect
MLIRTensorDialect
MLIRMemRefDialect
MLIRFuncDialect
MLIRBufferizationDialect
MLIRBufferizationTransforms
MLIRTaskflow
MLIRAffineToTaskflowPass
MLIRTosaToLinalg
MLIRTosaToTensor
MLIRTosaToArith
)
88 changes: 88 additions & 0 deletions lib/Conversion/TosaToTaskflow/TosaToTaskflowPipeline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
#include "mlir/Conversion/TosaToArith/TosaToArith.h"
#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
#include "mlir/Conversion/TosaToTensor/TosaToTensor.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/Passes.h"

#include "Conversion/ConversionPasses.h"

using namespace mlir;

namespace {
void buildTosaToAffinePipeline(OpPassManager &pm) {
// 0. TOSA Optimizations
// These passes must run on func::FuncOp
pm.addNestedPass<func::FuncOp>(tosa::createTosaInferShapesPass());
pm.addNestedPass<func::FuncOp>(tosa::createTosaMakeBroadcastablePass());
pm.addNestedPass<func::FuncOp>(tosa::createTosaLayerwiseConstantFoldPass());

// 1. TOSA to Linalg/Arith/Tensor
pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalgNamed());
pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalg());
pm.addNestedPass<func::FuncOp>(tosa::createTosaToArith());
pm.addNestedPass<func::FuncOp>(tosa::createTosaToTensor());

// 2. Linalg optimizations
pm.addNestedPass<func::FuncOp>(createLinalgElementwiseOpFusionPass());
pm.addNestedPass<func::FuncOp>(createConvertTensorToLinalgPass());

// 3. One-shot bufferization
bufferization::OneShotBufferizationOptions bufOpts;
bufOpts.bufferizeFunctionBoundaries = true;
bufOpts.setFunctionBoundaryTypeConversion(
bufferization::LayoutMapOption::IdentityLayoutMap);
bufOpts.functionArgTypeConverterFn = [](TensorType tensorType, Attribute memorySpace,
func::FuncOp funcOp, const bufferization::BufferizationOptions &options) {
return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, memorySpace);
};
pm.addPass(bufferization::createOneShotBufferizePass(bufOpts));
pm.addPass(bufferization::createBufferResultsToOutParamsPass());
pm.addPass(createCanonicalizerPass());

// 4. Linalg to Affine
pm.addNestedPass<func::FuncOp>(createConvertLinalgToAffineLoopsPass());
pm.addNestedPass<func::FuncOp>(memref::createFoldMemRefAliasOpsPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
}

void buildTosaToTaskflowPipeline(OpPassManager &pm) {
// 1. TOSA to Affine (foundational pipeline)
buildTosaToAffinePipeline(pm);

// 2. Affine to Taskflow
pm.addPass(createConvertAffineToTaskflowPass());
}
} // namespace

void mlir::registerTosaToAffinePipeline() {
PassPipelineRegistration<>(
"tosa-to-affine-pipeline",
"Lower TOSA to Affine dialect (TOSA -> Linalg -> Affine).",
buildTosaToAffinePipeline);
}

void mlir::registerTosaToTaskflowPipeline() {
PassPipelineRegistration<>(
"tosa-to-taskflow-pipeline",
"Lower TOSA to Taskflow dialect through Linalg and Affine.",
buildTosaToTaskflowPipeline);
}
27 changes: 27 additions & 0 deletions test/Conversion/TosaToTaskflow/affine-to-taskflow.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: mlir-neura-opt --convert-affine-to-taskflow %s 2>/dev/null | FileCheck %s

// Test Affine to Taskflow conversion
module {
func.func @simple_add(%arg0: memref<16xf32>, %arg1: memref<16xf32>, %arg2: memref<16xf32>) {
affine.for %i = 0 to 16 {
%0 = affine.load %arg0[%i] : memref<16xf32>
%1 = affine.load %arg1[%i] : memref<16xf32>
%2 = arith.addf %0, %1 : f32
affine.store %2, %arg2[%i] : memref<16xf32>
}
return
}
}

// CHECK-LABEL: func.func @simple_add
// CHECK-NEXT: %memory_outputs = "taskflow.task"(%arg0, %arg1, %arg2)
// CHECK-SAME: task_name = "Task_0"
// CHECK-NEXT: ^bb0(%arg3: memref<16xf32>, %arg4: memref<16xf32>, %arg5: memref<16xf32>):
// CHECK-NEXT: affine.for %arg6 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg3[%arg6] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %arg4[%arg6] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %arg5[%arg6] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: "taskflow.yield"(%arg5)
// CHECK: return
27 changes: 27 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-fusion.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s

// Test Linalg fusion capability
// We chain multiple elementwise ops. If fusion works, we should see ONE loop nest.
func.func @fusion_test(%arg0: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
%1 = tosa.mul %0, %0 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>

// A simple relu-like operation: max(0, x)
%zeros = "tosa.const"() {value = dense<0.0> : tensor<16xf32>} : () -> tensor<16xf32>
%2 = tosa.maximum %1, %zeros : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>

return %2 : tensor<16xf32>
}

// CHECK-LABEL: func.func @fusion_test
// CHECK-SAME: (%arg0: memref<16xf32>, %arg1: memref<16xf32>)
// CHECK: %cst = arith.constant 0.000000e+00 : f32
// CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK-NEXT: affine.for %arg2 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg0[%arg2] : memref<16xf32>
// CHECK-NEXT: %1 = arith.addf %0, %0 : f32
// CHECK-NEXT: %2 = arith.mulf %1, %1 : f32
// CHECK-NEXT: %3 = arith.maximumf %2, %cst : f32
// CHECK-NEXT: affine.store %3, %alloc[%arg2] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: memref.copy %alloc, %arg1
25 changes: 25 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-opt.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s

// Test TOSA optimization (constant folding) with arith.constant
func.func @const_fold_test() -> tensor<4xf32> {
%cst1 = arith.constant dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf32>
%cst2 = arith.constant dense<[10.0, 20.0, 30.0, 40.0]> : tensor<4xf32>

// This add should be constant folded by TOSA before lowering to Linalg
%folded = tosa.add %cst1, %cst2 : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
return %folded : tensor<4xf32>
}

// CHECK-LABEL: func.func @const_fold_test
// TODO: This should be folded to a memory copy of a global constant.
// Currently TOSA constant folding is not triggering as expected, so we check for the runtime op.
// CHECK: %0 = memref.get_global @__constant_4xf32 : memref<4xf32>
// CHECK-NEXT: %1 = memref.get_global @__constant_4xf32_0 : memref<4xf32>
// CHECK-NEXT: %alloc = memref.alloc() {alignment = 64 : i64} : memref<4xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 4 {
// CHECK-NEXT: %2 = affine.load %0[%arg1] : memref<4xf32>
// CHECK-NEXT: %3 = affine.load %1[%arg1] : memref<4xf32>
// CHECK-NEXT: %4 = arith.addf %2, %3 : f32
// CHECK-NEXT: affine.store %4, %alloc[%arg1] : memref<4xf32>
// CHECK-NEXT: }
// CHECK-NEXT: memref.copy %alloc, %arg0
19 changes: 19 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-to-affine.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// RUN: mlir-neura-opt --tosa-to-affine-pipeline %s | FileCheck %s

// Test TOSA to Affine lowering
func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
return %0 : tensor<16xf32>
}

// CHECK-LABEL: func.func @simple_add
// CHECK-SAME: (%arg0: memref<16xf32>, %arg1: memref<16xf32>, %arg2: memref<16xf32>)
// CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK-NEXT: affine.for %arg3 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg0[%arg3] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %arg1[%arg3] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %alloc[%arg3] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: memref.copy %alloc, %arg2
// CHECK-NEXT: return
22 changes: 22 additions & 0 deletions test/Conversion/TosaToTaskflow/tosa-to-taskflow.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: mlir-neura-opt --tosa-to-taskflow-pipeline %s 2>&1 | FileCheck %s
// Simple TOSA add lowering test

func.func @simple_add(%arg0: tensor<16xf32>, %arg1: tensor<16xf32>) -> tensor<16xf32> {
%0 = tosa.add %arg0, %arg1 : (tensor<16xf32>, tensor<16xf32>) -> tensor<16xf32>
return %0 : tensor<16xf32>
}

// CHECK-LABEL: func.func @simple_add
// CHECK: %alloc = memref.alloc() {alignment = 64 : i64} : memref<16xf32>
// CHECK-NEXT: %[[RES:.*]] = "taskflow.task"(%arg0, %arg1, %alloc)
// CHECK-SAME: task_name = "Task_0"
// CHECK-NEXT: ^bb0(%arg3: memref<16xf32>, %arg4: memref<16xf32>, %arg5: memref<16xf32>):
// CHECK-NEXT: affine.for %arg6 = 0 to 16 {
// CHECK-NEXT: %0 = affine.load %arg3[%arg6] : memref<16xf32>
// CHECK-NEXT: %1 = affine.load %arg4[%arg6] : memref<16xf32>
// CHECK-NEXT: %2 = arith.addf %0, %1 : f32
// CHECK-NEXT: affine.store %2, %arg5[%arg6] : memref<16xf32>
// CHECK-NEXT: }
// CHECK-NEXT: "taskflow.yield"(%arg5)
// CHECK: memref.copy %[[RES]], %arg2
// CHECK-NEXT: return
2 changes: 1 addition & 1 deletion test/benchmark/CGRA-Bench
10 changes: 10 additions & 0 deletions tools/mlir-neura-opt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ set(LIBS
MLIRConversion
MLIRNeura
MLIRTaskflow
MLIRTosaDialect
MLIRTosaTransforms
MLIRLinalgTransforms
MLIRArithTransforms
MLIRSCFTransforms
MLIRTensorTransforms
MLIRBufferizationDialect
MLIRBufferizationTransforms
MLIRFuncAllExtensions
MLIRTensorAllExtensions
MLIRTransforms
MLIROptLib
MLIRPass
Expand Down
21 changes: 21 additions & 0 deletions tools/mlir-neura-opt/mlir-neura-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/InitAllDialects.h"
#include "mlir/InitAllPasses.h"
#include "mlir/InitAllExtensions.h"
#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Tools/mlir-opt/MlirOptMain.h"
Expand Down Expand Up @@ -73,16 +81,29 @@ int main(int argc, char **argv) {
registry.insert<mlir::ml_program::MLProgramDialect>();
registry.insert<mlir::tensor::TensorDialect>();
registry.insert<mlir::linalg::LinalgDialect>();
registry.insert<mlir::tosa::TosaDialect>();
registry.insert<mlir::bufferization::BufferizationDialect>();
registry.insert<mlir::taskflow::TaskflowDialect>();
mlir::registerAllExtensions(registry);
mlir::linalg::registerBufferizableOpInterfaceExternalModels(registry);
mlir::tensor::registerBufferizableOpInterfaceExternalModels(registry);
mlir::arith::registerBufferizableOpInterfaceExternalModels(registry);
mlir::scf::registerBufferizableOpInterfaceExternalModels(registry);
mlir::bufferization::func_ext::registerBufferizableOpInterfaceExternalModels(registry);

mlir::neura::registerPasses();
mlir::registerAllPasses();
mlir::registerPasses();
mlir::registerViewOpGraphPass();
mlir::taskflow::registerPasses();

// Register all standard conversion passes
mlir::registerConversionPasses();

// Register TOSA to Taskflow pipeline
mlir::registerTosaToAffinePipeline();
mlir::registerTosaToTaskflowPipeline();

// Print architecture spec file info
if (!architecture_spec_file.empty()) {
llvm::errs() << "[mlir-neura-opt] Architecture specification file: "
Expand Down