Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions include/TaskflowDialect/TaskflowAttributes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include "llvm/ADT/StringRef.h"

namespace mlir {
namespace taskflow {
namespace attr {
// Attribute keys on taskflow.task operations produced by the
// TaskDivisibilityAnalysisPass.
constexpr llvm::StringLiteral kDivisibilityInfo = "divisibility_info";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Call this task_info?

constexpr llvm::StringLiteral kDivisibility = "divisibility";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these be named as parallelism = parallel/atomic?

constexpr llvm::StringLiteral kParallelDims = "parallel_dims";
constexpr llvm::StringLiteral kParallelSpace = "parallel_space";

namespace val {
constexpr llvm::StringLiteral kDivisible = "divisible";
constexpr llvm::StringLiteral kAtomic = "atomic";
} // namespace val
} // namespace attr
} // namespace taskflow
} // namespace mlir
1 change: 1 addition & 0 deletions include/TaskflowDialect/TaskflowPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ std::unique_ptr<mlir::Pass> createConstructHyperblockFromTaskPass();
std::unique_ptr<mlir::Pass> createClassifyCountersPass();
std::unique_ptr<mlir::Pass> createMapTaskOnCgraPass();
std::unique_ptr<mlir::Pass> createFuseTaskPass();
std::unique_ptr<mlir::Pass> createTaskDivisibilityAnalysisPass();

//=========================================================//
// Optimization Passes
Expand Down
27 changes: 27 additions & 0 deletions include/TaskflowDialect/TaskflowPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,33 @@ def FuseTask : Pass<"fuse-task", "func::FuncOp"> {
"mlir::taskflow::TaskflowDialect"];
}

def TaskDivisibilityAnalysis : Pass<"task-divisibility-analysis", "func::FuncOp"> {
let summary = "Analyzes taskn divisibility based on loop parallelism";
let description = [{
Analyzes each taskflow.task operation to determine whether its loop nest
contains parallel loops that can be tiled for data-level parallelism.

Task divisibility categories:
- divisible: The task has at least one parallel loop (no loop-carried
dependencies) whose trip count > 1. Such tasks can be tiled into
sibling sub-tasks for runtime configuration duplication (DLP).
- atomic: The task has no exploitable parallel loops. It must execute
as a single indivisible unit.

The pass attaches three attributes to each taskflow.task:
- divisibility : StringAttr ("divisible" or "atomic")
- parallel_dims : DenseI64ArrayAttr (loop depth indices of parallel loops)
- parallel_space : DenseI64ArrayAttr (trip counts of the parallel dims)

Parallel loop detection uses MLIR's affine dependence analysis
(isLoopParallel). Reduction loops recognised by MLIR are also counted
as parallel.
}];
let constructor = "taskflow::createTaskDivisibilityAnalysisPass()";
let dependentDialects = ["mlir::affine::AffineDialect",
"mlir::func::FuncDialect"];
}

def MemoryAccessStreamingFusion
: Pass<"memory-access-streaming-fusion", "func::FuncOp"> {
let summary =
Expand Down
1 change: 1 addition & 0 deletions lib/TaskflowDialect/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_mlir_library(MLIRTaskflowTransforms
ClassifyCountersPass.cpp
MapTaskOnCgraPass.cpp
FuseTaskPass.cpp
TaskDivisibilityAnalysisPass.cpp

DEPENDS
MLIRTaskflowTransformsIncGen
Expand Down
220 changes: 220 additions & 0 deletions lib/TaskflowDialect/Transforms/TaskDivisibilityAnalysisPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
//===- TaskDivisibilityAnalysisPass.cpp - Analyze task divisibility ----===//
//
// This pass analyzes each taskflow.task operation to determine whether its
// loop nest contains parallel loops that can be tiled for data-level
// parallelism (DLP).
//
// Task divisibility categories:
// - divisible: Has at least one parallel loop (no loop-carried deps) with
// trip_count > 1. Can be tiled into sibling sub-tasks for runtime
// configuration duplication.
// - atomic: No exploitable parallel loops. Must execute as a single
// indivisible unit.
//
// The pass attaches an attribute to each taskflow.task:
// divisibility_info = {
// divisibility : StringAttr ("divisible" or "atomic")
// parallel_dims : DenseI32ArrayAttr (loop depth indices of parallel loops)
// parallel_space : DenseI32ArrayAttr (trip counts of those parallel loops)
// }
//
//===----------------------------------------------------------------------===//

#include "TaskflowDialect/TaskflowAttributes.h"
#include "TaskflowDialect/TaskflowDialect.h"
#include "TaskflowDialect/TaskflowOps.h"
#include "TaskflowDialect/TaskflowPasses.h"

#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"

using namespace mlir;
using namespace mlir::taskflow;

namespace {

//===----------------------------------------------------------------------===//
// Loop Nest Traversal Helpers
//===----------------------------------------------------------------------===//

// Collects the full loop nest starting from `outermost`, walking into
// perfectly and imperfectly nested loops (only follows the first nested
// affine.for at each level to form the "spine" of the nest).
static SmallVector<affine::AffineForOp>
collectLoopNest(affine::AffineForOp outermost) {
SmallVector<affine::AffineForOp> nest;
affine::AffineForOp current = outermost;

while (current) {
nest.push_back(current);

// Looks for a single nested affine.for in the body.
affine::AffineForOp nested = nullptr;
for (Operation &op : current.getBody()->getOperations()) {
if (auto for_op = dyn_cast<affine::AffineForOp>(&op)) {
if (nested) {
// Multiple nested loops — stop descending (not a simple chain).
nested = nullptr;
break;
}
nested = for_op;
}
}
current = nested;
}

return nest;
}

//===----------------------------------------------------------------------===//
// Per-Task Parallelism Analysis
//===----------------------------------------------------------------------===//

struct TaskParallelismInfo {
StringRef divisibility; // "divisible" or "atomic"
SmallVector<int> parallel_dims; // Loop depth indices of parallel loops.
SmallVector<int> parallel_space; // Trip counts of parallel dims.
};

// Analyzes a single taskflow.task and determines its category.
static TaskParallelismInfo analyzeTask(TaskflowTaskOp task_op) {
TaskParallelismInfo info;
info.divisibility = attr::val::kAtomic; // Default: no parallelism found.

// Finds the outermost affine.for in the task body.
affine::AffineForOp outermost_loop = nullptr;
task_op.getBody().walk([&](affine::AffineForOp for_op) {
// We want the outermost loop. Walk visits ops in pre-order,
// so the first affine.for encountered at the top level is outermost.
if (!outermost_loop) {
// Checks that this loop is at the top level of the task body
// (its parent is the task's block, not another loop).
if (for_op->getParentOp() == task_op.getOperation()) {
outermost_loop = for_op;
}
}
});

if (!outermost_loop) {
llvm::errs() << "[TaskDivisibilityAnalysis] Task " << task_op.getTaskName()
<< ": no affine.for found, classified as atomic\n";
return info;
}

// Collects the loop nest spine.
SmallVector<affine::AffineForOp> loop_nest = collectLoopNest(outermost_loop);

llvm::errs() << "[TaskDivisibilityAnalysis] Task " << task_op.getTaskName()
<< ": loop nest depth = " << loop_nest.size() << "\n";

// Analyzes each loop level for parallelism.
for (size_t depth = 0; depth < loop_nest.size(); ++depth) {
affine::AffineForOp loop = loop_nest[depth];

// Checks if the loop is parallel (not including reduction-parallel).
bool is_parallel = affine::isLoopParallel(loop);

// Gets the trip count.
std::optional<int> trip_count = affine::getConstantTripCount(loop);
int tc = trip_count.has_value() ? static_cast<int>(*trip_count) : -1;

llvm::errs() << "[TaskDivisibilityAnalysis] depth " << depth
<< ": parallel=" << is_parallel << ", trip_count=" << tc
<< "\n";

if (is_parallel && tc > 1) {
info.parallel_dims.push_back(static_cast<int>(depth));
info.parallel_space.push_back(tc);
}
}

// Classifies based on whether any parallel dims were found.
if (!info.parallel_dims.empty()) {
info.divisibility = "divisible";
}

llvm::errs() << "[TaskDivisibilityAnalysis] Task " << task_op.getTaskName()
<< " -> " << info.divisibility;
if (!info.parallel_dims.empty()) {
llvm::errs() << ", parallel_dims=[";
for (size_t i = 0; i < info.parallel_dims.size(); ++i) {
if (i > 0)
llvm::errs() << ",";
llvm::errs() << info.parallel_dims[i];
}
llvm::errs() << "], parallel_space=[";
for (size_t i = 0; i < info.parallel_space.size(); ++i) {
if (i > 0)
llvm::errs() << ",";
llvm::errs() << info.parallel_space[i];
}
llvm::errs() << "]";
}
llvm::errs() << "\n";

return info;
}

//===----------------------------------------------------------------------===//
// Task Divisibility Analysis Pass
//===----------------------------------------------------------------------===//

struct TaskDivisibilityAnalysisPass
: public PassWrapper<TaskDivisibilityAnalysisPass,
OperationPass<func::FuncOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TaskDivisibilityAnalysisPass)

StringRef getArgument() const final { return "task-divisibility-analysis"; }

StringRef getDescription() const final {
return "Analyzes task divisibility based on loop parallelism";
}

void runOnOperation() override {
func::FuncOp func = getOperation();

llvm::errs() << "[TaskDivisibilityAnalysis] Running on function: "
<< func.getName() << "\n";

func.walk([&](TaskflowTaskOp task_op) {
// Analyzes the task.
TaskParallelismInfo info = analyzeTask(task_op);
// Attaches the divisibility_info attribute to each task.
MLIRContext *ctx = task_op.getContext();
OpBuilder builder(task_op);

SmallVector<NamedAttribute, 3> div_attrs;
div_attrs.push_back(
NamedAttribute(StringAttr::get(ctx, attr::kDivisibility),
StringAttr::get(ctx, info.divisibility)));
div_attrs.push_back(
NamedAttribute(StringAttr::get(ctx, attr::kParallelDims),
DenseI32ArrayAttr::get(ctx, info.parallel_dims)));
div_attrs.push_back(
NamedAttribute(StringAttr::get(ctx, attr::kParallelSpace),
DenseI32ArrayAttr::get(ctx, info.parallel_space)));

task_op->setAttr(attr::kDivisibilityInfo,
builder.getDictionaryAttr(div_attrs));
});
}
};

} // namespace

//===----------------------------------------------------------------------===//
// Pass Registration
//===----------------------------------------------------------------------===//

std::unique_ptr<Pass> mlir::taskflow::createTaskDivisibilityAnalysisPass() {
return std::make_unique<TaskDivisibilityAnalysisPass>();
}
23 changes: 23 additions & 0 deletions test/multi-cgra/kernel_mapping/fir/fir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
// RUN: -o %t.taskflow.mlir
// RUN: FileCheck %s --input-file=%t.taskflow.mlir --check-prefixes=TASKFLOW

// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \
// RUN: --task-divisibility-analysis \
// RUN: -o %t.div.mlir
// RUN: FileCheck %s --input-file=%t.div.mlir --check-prefixes=DIV

// RUN: mlir-neura-opt %s --convert-affine-to-taskflow \
// RUN: --construct-hyperblock-from-task \
// RUN: -o %t.hyperblock.mlir
Expand Down Expand Up @@ -107,6 +112,24 @@ module attributes {} {
// TASKFLOW-NEXT: }
// TASKFLOW-NEXT: }

// DIV: module {
// DIV-NEXT: func.func @_Z6kernelPiS_S_(%arg0: memref<?xi32>, %arg1: memref<?xi32>, %arg2: memref<?xi32>) -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
// DIV-NEXT: %c0_i32 = arith.constant 0 : i32
// DIV-NEXT: %dependency_read_out:2, %value_outputs = taskflow.task @Task_0 dependency_read_in(%arg0, %arg2 : memref<?xi32>, memref<?xi32>) value_inputs(%c0_i32 : i32) [original_read_memrefs(%arg0, %arg2 : memref<?xi32>, memref<?xi32>)] {divisibility_info = {divisibility = "atomic", parallel_dims = array<i32>, parallel_space = array<i32>}} : (memref<?xi32>, memref<?xi32>, i32) -> (memref<?xi32>, memref<?xi32>, i32) {
// DIV-NEXT: ^bb0(%arg3: memref<?xi32>, %arg4: memref<?xi32>, %arg5: i32):
// DIV-NEXT: %0 = affine.for %arg6 = 0 to 32 iter_args(%arg7 = %arg5) -> (i32) {
// DIV-NEXT: %1 = affine.load %arg3[%arg6] : memref<?xi32>
// DIV-NEXT: %2 = affine.load %arg4[%arg6] : memref<?xi32>
// DIV-NEXT: %3 = arith.muli %1, %2 : i32
// DIV-NEXT: %4 = arith.addi %arg7, %3 : i32
// DIV-NEXT: affine.yield %4 : i32
// DIV-NEXT: }
// DIV-NEXT: taskflow.yield reads(%arg3, %arg4 : memref<?xi32>, memref<?xi32>) values(%0 : i32)
// DIV-NEXT: }
// DIV-NEXT: return %value_outputs : i32
// DIV-NEXT: }
// DIV-NEXT: }

// HYPERBLOCK: module {
// HYPERBLOCK-NEXT: func.func @_Z6kernelPiS_S_(%arg0: memref<?xi32>, %arg1: memref<?xi32>, %arg2: memref<?xi32>) -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
// HYPERBLOCK-NEXT: %c0_i32 = arith.constant 0 : i32
Expand Down
Loading
Loading