Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f370da0
add counter classification pass
ShangkunLi Jan 22, 2026
fe987b8
change the definition of taskflow.hyperblock.yield
ShangkunLi Jan 22, 2026
5bb3777
change the definition of neura.kernel
ShangkunLi Jan 22, 2026
db78bc7
enable taskflow to neura conversion
ShangkunLi Jan 22, 2026
9ef2216
assign accelerator for neura.kernel
ShangkunLi Jan 23, 2026
3ff449b
enable neura.kernel lowering in conversion passes
ShangkunLi Jan 23, 2026
f41691a
enable promote func/kernel arguments to constant
ShangkunLi Jan 23, 2026
3907f67
enable canonicalize-return for neura.kernel
ShangkunLi Jan 23, 2026
8ae8187
enable leverage-predicated-values for neura.kernel
ShangkunLi Jan 23, 2026
deffa0a
enable kernel with counters dataflow lowering
ShangkunLi Jan 24, 2026
c5e42eb
enable kernel without counters dataflow lowering
ShangkunLi Jan 24, 2026
2842fae
enable kenrel mapping
ShangkunLi Jan 24, 2026
6f7084d
enable kernel mapping
ShangkunLi Jan 24, 2026
7a06474
distinguish iter_arg_init in fold-constant pass
ShangkunLi Jan 24, 2026
8948aee
add tests for e2e taskflow2neura test
ShangkunLi Jan 24, 2026
b86894d
change the definition of taskflow.hyperblock.yield
ShangkunLi Jan 22, 2026
10b1076
[clean] remove redundant code
ShangkunLi Jan 26, 2026
2ce8031
[clean] remove redudant files
ShangkunLi Jan 26, 2026
d8e7c0f
sync with main
ShangkunLi Jan 27, 2026
b454a8d
sync with main
ShangkunLi Jan 31, 2026
75cbdfe
recover wraploopinkernel pass
ShangkunLi Jan 31, 2026
2898f15
[fix] fix bugs in iter_args handling
ShangkunLi Jan 31, 2026
565c4fd
sync with main
ShangkunLi Jan 31, 2026
616da9d
revert the github workflow
ShangkunLi Jan 31, 2026
67ea96b
modify the git clone cmd
ShangkunLi Jan 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/Conversion/ConversionPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ std::unique_ptr<mlir::Pass> createLowerAffineToNeuraPass();

// TaskFlow Conversion Passes.
std::unique_ptr<mlir::Pass> createConvertAffineToTaskflowPass();

std::unique_ptr<mlir::Pass> createConvertTaskflowToNeuraPass();
#define GEN_PASS_REGISTRATION
#include "Conversion/ConversionPasses.h.inc"

Expand Down
13 changes: 13 additions & 0 deletions include/Conversion/ConversionPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,17 @@ def ConvertAffineToTaskflow : Pass<"convert-affine-to-taskflow", "ModuleOp">{
];
}

def ConvertTaskflowToNeura : Pass<"convert-taskflow-to-neura", "ModuleOp">{
let summary = "Convert taskflow.hyperblock to neura.kernel";
let description = [{
Converts taskflow.hyperblock operations with leaf counters into neura.kernel
operations suitable for CGRA tile array mapping.
}];
let constructor = "mlir::createConvertTaskflowToNeuraPass()";
let dependentDialects = [
"mlir::taskflow::TaskflowDialect",
"mlir::neura::NeuraDialect"
];
}

#endif // CONVERSION_PASSES_TD
10 changes: 8 additions & 2 deletions include/NeuraDialect/Architecture/Architecture.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ enum OperationKind {
// Data movement operations.
IReserve = 38,
IDataMov = 39,
ICtrlMov = 40
ICtrlMov = 40,
// Counter operations.
ICounter = 41,
IExtractPredicate = 42
};

// Maps hardware resource names to their supported operations.
Expand Down Expand Up @@ -135,7 +138,10 @@ static const std::map<std::string, std::vector<OperationKind>>

// Predicate operations.
{"grant", {IGrantPredicate, IGrantOnce, IGrantAlways}},
};

// Counter operations.
{"counter", {ICounter}},
{"extract_predicate", {IExtractPredicate}}};

//===----------------------------------------------------------------------===//
// BasicResource: abstract base class for Tile, Link, etc.
Expand Down
11 changes: 6 additions & 5 deletions include/NeuraDialect/Architecture/ArchitectureSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ struct TileDefaults {
// Default function unit types - include all supported function units
// types for newbie convenience.
std::vector<std::string> function_units = {
"add", "mul", "div", "fadd", "fmul",
"fdiv", "logic", "cmp", "sel", "type_conv",
"shift", "vfmul", "fadd_fadd", "fmul_fadd", "grant",
"loop_control", "phi", "constant", "mem", "return",
"mem_indexed", "alloca"};
"add", "mul", "div", "fadd",
"fmul", "fdiv", "logic", "cmp",
"sel", "type_conv", "shift", "vfmul",
"fadd_fadd", "fmul_fadd", "grant", "loop_control",
"phi", "constant", "mem", "return",
"mem_indexed", "alloca", "counter", "extract_predicate"};
};

// Structure for holding memory configuration.
Expand Down
12 changes: 6 additions & 6 deletions include/NeuraDialect/Mapping/mapping_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ struct RecurrenceCycle {
};

// Collects recurrence cycles rooted at reserve and closed by ctrl_mov.
SmallVector<RecurrenceCycle, 4> collectRecurrenceCycles(Operation *func_op);
SmallVector<RecurrenceCycle, 4> collectRecurrenceCycles(Region &region);

// Calculates ResMII: ceil(#ops / #tiles).
int calculateResMii(Operation *func_op, const Architecture &architecture);
int calculateResMii(Region &region, const Architecture &architecture);

// Returns topologically sorted operations in func_op.
std::vector<Operation *> getTopologicallySortedOps(Operation *func_op);
// Returns topologically sorted operations in region.
std::vector<Operation *> getTopologicallySortedOps(Region &region);

// Given the sorted operations, returns a vector of pairs where each pair
// contains a vector of operations at the same ALAP (as late as possible)
Expand Down Expand Up @@ -82,8 +82,8 @@ bool tryRouteBackwardMove(Operation *mov_op, MappingLoc src_loc,
// ctrl_mov users found.
llvm::SmallVector<Operation *> getCtrlMovUsers(Operation *op);

// Identifies operations on the critical path (i.e., operations with zero slack).
// Returns pair of: (critical_ops_set, asap_level_map)
// Identifies operations on the critical path (i.e., operations with zero
// slack). Returns pair of: (critical_ops_set, asap_level_map)
std::pair<std::set<Operation *>, llvm::DenseMap<Operation *, int>>
identifyCriticalPathOps(const std::vector<Operation *> &sorted_ops);

Expand Down
74 changes: 67 additions & 7 deletions include/NeuraDialect/NeuraOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ include "mlir/IR/OpBase.td"
// Defines neura kernel related operations.
// ----------------------------------------------------

def Neura_KernelOp : Op<NeuraDialect, "kernel", [RecursiveMemoryEffects, SingleBlockImplicitTerminator<"YieldOp">]> {
def Neura_KernelOp : Op<NeuraDialect, "kernel", [
IsolatedFromAbove,
RecursiveMemoryEffects,
AttrSizedOperandSegments
]> {
let summary = "Marks a region for CGRA execution.";
let description = [{
Defines a computation region that should be offloaded to CGRA.
Expand Down Expand Up @@ -41,25 +45,27 @@ def Neura_KernelOp : Op<NeuraDialect, "kernel", [RecursiveMemoryEffects, SingleB

let arguments = (ins
Variadic<AnyType>:$inputs, // Input operands from surrounding context.
Variadic<AnyType>:$iter_args_init, // Initial values for loop carried variables.
OptionalAttr<I32Attr>:$cgra_id, // Target CGRA ID (for multi-CGRA systems).
OptionalAttr<StrAttr>:$kernel_name, // Name of the kernel (for identification).
OptionalAttr<StrAttr>:$accelerator // Target accelerator name.
);

let results = (outs Variadic<AnyType>:$outputs);

let regions = (region SizedRegion<1>:$body);
let regions = (region AnyRegion:$body);

let assemblyFormat = [{
(`ins` `(` $inputs^ `:` type($inputs) `)` )?
(`inputs` `(` $inputs^ `:` type($inputs) `)` )?
(`iter_args_init` `(` $iter_args_init^ `:` type($iter_args_init) `)` )?
attr-dict-with-keyword
$body
(`:` type($outputs)^)?
}];
}

// Yield operation for fused_op and kernel regions.
def Neura_YieldOp : Op<NeuraDialect, "yield", [Terminator, Pure, ReturnLike]> {
def Neura_YieldOp : Op<NeuraDialect, "yield", [Terminator, Pure, ReturnLike, AttrSizedOperandSegments]> {
let summary = "Yield values from a neura.kernel or neura.fused_op region.";
let description = [{
Returns values from a neura.kernel or neura.fused_op region to the parent operation.
Expand All @@ -72,13 +78,17 @@ def Neura_YieldOp : Op<NeuraDialect, "yield", [Terminator, Pure, ReturnLike]> {
} : f32
}];

let arguments = (ins Variadic<AnyType>:$values);
let arguments = (ins
Variadic<AnyType>:$iter_args_next,
Variadic<AnyType>:$results);

let builders = [
OpBuilder<(ins), [{ build($_builder, $_state, ValueRange{}); }]>
OpBuilder<(ins), [{ build($_builder, $_state, ValueRange{}, ValueRange{}); }]>
];

let assemblyFormat = [{($values^ `:` type($values))? attr-dict}];
let assemblyFormat = [{
(`iter_args_next` `(` $iter_args_next^ `:` type($iter_args_next) `)` )?
(`results` `(` $results^ `:` type($results) `)` )? attr-dict}];

let hasVerifier = 1;
}
Expand Down Expand Up @@ -770,6 +780,56 @@ def Neura_LoopControlOp : Op<NeuraDialect, "loop_control">{
// " `(``parent_valid` `=` $parentValid `,` `start` `=` $start `,` `end` `=` $end `,` `step` `=` $step`)` attr-dict `:` type($parentValid) `,` type($start) `,` type($end) `,` type($step) `->` type($nextindex) `,` type($valid)";
}

// Defines an operation for hardware loop counters.
def Neura_CounterOp : Op<NeuraDialect, "counter", [Pure]>{
let summary = "Hardware loop counter for CGRA execution.";
let description = [{
Represents a hardware loop counter unit that generates loop indices.
This maps directly to a counter FU on the CGRA.

The counter produces:
- current index: the current loop index value.

Example:
%idx = neura.counter {
lower_bound = 0 : index,
upper_bound = 32 : index,
step = 1 : index,
counter_type = "leaf"
} : index
}];
let arguments = (ins
IndexAttr:$lower_bound,
IndexAttr:$upper_bound,
IndexAttr:$step,
StrAttr:$counter_type,
I32Attr:$counter_id
);

let results = (outs AnyType:$current_index);
let assemblyFormat = "attr-dict `:` type($current_index)";
}

// Defines an operation to extract the predicate bit from a predicated value.
def Neura_ExtractPredicateOp : Op<NeuraDialect, "extract_predicate">{
let summary = "Extracts the predicate bit from a predicated value.";
let description = [{
Extracts the predicate bit from a predicated value,
producing a boolean predicated value: !neura.predicated<i1, i1>.

Example:
%counter = neura.counter {bound = 16} : !neura.predicated<index, i1>
%is_valid = neura.extract_predicate %counter : !neura.predicated<index, i1> -> !neura.predicated<i1, i1>

// Use for gating final results:
%gated = neura.grant_predicate %result, %is_valid
}];

let arguments = (ins AnyType:$input);
let results = (outs AnyType:$predicate);
let assemblyFormat = "$input attr-dict `:` type($input) `->` type($predicate)";
}

// ----------------------------------------------------
// Defines operations for steering-control based DFG execution.
// ----------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion include/NeuraDialect/NeuraPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ std::unique_ptr<mlir::Pass> createMapToAcceleratorPass();
std::unique_ptr<mlir::Pass> createGenerateCodePass();
std::unique_ptr<mlir::Pass> createCanonicalizeReturnPass();
std::unique_ptr<mlir::Pass> createCanonicalizeLiveInPass();
std::unique_ptr<mlir::Pass> createPromoteFuncArgToConstPass();
std::unique_ptr<mlir::Pass> createPromoteInputArgToConstPass();
std::unique_ptr<mlir::Pass> createTransformToSteerControlPass();
std::unique_ptr<mlir::Pass> createRemovePredicatedTypePass();
std::unique_ptr<mlir::Pass> createWrapLoopInKernelPass();
Expand All @@ -38,6 +38,7 @@ std::unique_ptr<mlir::Pass> createWrapLoopInKernelPass();
// Hardware specific optimization passes
std::unique_ptr<mlir::Pass> createFuseLoopControlPass();
std::unique_ptr<mlir::Pass> createFusePatternPass();
std::unique_ptr<mlir::Pass> createWrapLoopInKernelPass();

// Hardware agnostic optimization passes
std::unique_ptr<mlir::Pass> createFoldConstantPass();
Expand Down
8 changes: 4 additions & 4 deletions include/NeuraDialect/NeuraPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ def CanonicalizeLiveIn : Pass<"canonicalize-live-in", "ModuleOp"> {
let constructor = "neura::createCanonicalizeLiveInPass()";
}

def PromoteFuncArgToConst : Pass<"promote-func-arg-to-const", "ModuleOp"> {
let summary = "Promotes function arguments to neura constant operations";
def PromoteInputArgToConst : Pass<"promote-input-arg-to-const", "ModuleOp"> {
let summary = "Promotes input arguments of functions or neura.kernels to neura constant operations";
let description = [{
This pass promotes function arguments to neura constant operations.
This pass promotes input arguments of functions or neura.kernels to neura constant operations.
}];
let constructor = "neura::createPromoteFuncArgToConstPass()";
let constructor = "neura::createPromoteInputArgToConstPass()";
}

def CanonicalizeCast : Pass<"canonicalize-cast", "ModuleOp"> {
Expand Down
31 changes: 19 additions & 12 deletions include/TaskflowDialect/TaskflowOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -144,39 +144,42 @@ def TaskflowChannelOp : TaskflowOpBase<"channel", [Pure, SameOperandsAndResultTy
// Intra-Task Operations.
//----------------------------------------------------------------------
// Counter operation representing loop iteration control within a Taskflow task.
def TaskflowCounterOp : TaskflowOpBase<"counter", [Pure]>{
def TaskflowCounterOp : TaskflowOpBase<"counter", []>{
let summary = "Loop counter operation with hardware counter semantics";

let description = [{
Represents a loop counter that generates iteration indices.
The hardware counter produces a predicated index value.

Counter behavior:
- Top-level counter: increments unconditionally each cycle.
- Nested counter: increments only when the parent counter is valid.
Counter classification:
- "root": Top-level counter with no parent (drives entire loop nest)
- "relay": Intermediate counter with both parent and child counters
- "leaf": Innermost counter with no child counters (maps to CGRA tile array)

Example:
// Top-level counter
// Root counter
%i = taskflow.counter {
lower_bound = 0 : index,
upper_bound = 16 : index,
step = 1 : index,
counter_name = "i"
counter_type = "root"
} : index
// Nested counter
// Leaf counter
%j = taskflow.counter parent(%i) {
lower_bound = 0 : index,
upper_bound = 8 : index,
step = 1 : index,
counter_name = "j"
counter_type = "leaf"
} : index
}];

let arguments = (ins
Optional<AnyType>:$parent_index,
IndexAttr:$lower_bound,
IndexAttr:$upper_bound,
IndexAttr:$step
IndexAttr:$step,
OptionalAttr<StrAttr>:$counter_type,
OptionalAttr<I32Attr>:$counter_id
);

let results = (outs AnyType:$counter_index);
Expand Down Expand Up @@ -233,6 +236,7 @@ def TaskflowHyperblockYieldOp : TaskflowOpBase<"hyperblock.yield", [
Terminator,
Pure,
ReturnLike,
AttrSizedOperandSegments,
ParentOneOf<["TaskflowHyperblockOp"]>
]>{
let summary = "Yield operation for Taskflow hyperblock";
Expand All @@ -241,15 +245,18 @@ def TaskflowHyperblockYieldOp : TaskflowOpBase<"hyperblock.yield", [
Terminates the hyperblock body.
}];

let arguments = (ins Variadic<AnyType>:$outputs);
let arguments = (ins
Variadic<AnyType>:$iter_args_next,
Variadic<AnyType>:$results);

let assemblyFormat = [{
(`outputs` `(` $outputs^ `:` type($outputs) `)`)?
(`iter_args_next` `(` $iter_args_next^ `:` type($iter_args_next) `)`)?
(`results` `(` $results^ `:` type($results) `)`)?
attr-dict
}];

let builders = [
OpBuilder<(ins), [{build($_builder, $_state, ValueRange{});}]>
OpBuilder<(ins), [{build($_builder, $_state, ValueRange{}, ValueRange{});}]>
];
}

Expand Down
1 change: 1 addition & 0 deletions include/TaskflowDialect/TaskflowPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace taskflow {
#include "TaskflowDialect/TaskflowPasses.h.inc"
std::unique_ptr<mlir::Pass> createConstructHyperblockFromTaskPass();
std::unique_ptr<mlir::Pass> createCanonicalizeTaskPass();
std::unique_ptr<mlir::Pass> createClassifyCountersPass();

#define GEN_PASS_REGISTRATION
#include "TaskflowDialect/TaskflowPasses.h.inc"
Expand Down
14 changes: 14 additions & 0 deletions include/TaskflowDialect/TaskflowPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,18 @@ def CanonicalizeTask: Pass<"canonicalize-task", "func::FuncOp">{
}];
let constructor = "taskflow::createCanonicalizeTaskPass()";
}

def ClassifyCounters : Pass<"classify-counters", "ModuleOp">{
let summary = "Classifies counters as root/relay/leaf";
let description = [{
Analyzes the counter hierarchy within taskflow.task operations and
classifies each counter:
- root: Top-level counter with no parent
- relay: Intermediate counter with both parent and child counters
- leaf: Innermost counter with no child counters

Leaf counters are mapped to CGRA tile arrays.
}];
let constructor = "taskflow::createClassifyCountersPass()";
}
#endif // TASKFLOW_PASSES_TD
Loading