Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/NeuraDialect/Architecture/Architecture.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,9 @@ class Architecture {

// Function for getting the architecture object.
const Architecture &getArchitecture();

// Function for getting the latency specification file path.
const std::string &getLatencySpecFile();
} // namespace neura
} // namespace mlir

Expand Down
46 changes: 41 additions & 5 deletions include/NeuraDialect/Mapping/MappingState.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
namespace mlir {
namespace neura {

// Occupy status for multi-cycle pipeline support.
// These states define how a tile/FU is occupied at a given time step.
#define SINGLE_OCCUPY 0 // A single-cycle op is in the FU (exclusive)
#define START_PIPE_OCCUPY 1 // A multi-cycle op starts in the FU
#define END_PIPE_OCCUPY 2 // A multi-cycle op ends in the FU
#define IN_PIPE_OCCUPY 3 // A multi-cycle op is occupying the FU (pipelined)

// Represents a spatial-temporal location: (resource, time_step)
struct MappingLoc {
BasicResource *resource;
Expand Down Expand Up @@ -54,9 +61,20 @@ namespace neura {
class MappingState {
public:
MappingState(const Architecture &arch, int II, bool is_spatial_only);
// Binds a (tile/link, time_step) location to an operation.
// Binds a (tile/link, time_step) location to an operation with default
// SINGLE_OCCUPY status.
bool bindOp(const MappingLoc &loc, Operation *op);

// Binds a (tile/link, time_step) location to an operation with specified
// occupy status for multi-cycle pipeline support.
bool bindOp(const MappingLoc &loc, Operation *op, int occupy_status);

// Binds multiple locations for a multi-cycle operation.
// This sets START_PIPE_OCCUPY at start_time, IN_PIPE_OCCUPY for intermediate
// times, and END_PIPE_OCCUPY at end_time-1.
bool bindMultiCycleOp(BasicResource *resource, int start_time, int latency,
Operation *op);

// Unbinds an operation from its (tile/link, time_step) location,
// which is useful for backtracking.
void unbindOp(Operation *op);
Expand All @@ -67,6 +85,19 @@ class MappingState {
// it will check (tile 2, step 1), (tile 2, step 5), (tile 2, step 9), etc.
bool isAvailableAcrossTime(const MappingLoc &loc) const;

// Checks if a location is available for a specific occupy status.
// This implements the pipeline-aware availability checking:
// - SINGLE_OCCUPY: only available if location is completely free
// - START_PIPE_OCCUPY: available if free or IN_PIPE_OCCUPY or END_PIPE_OCCUPY
// - END_PIPE_OCCUPY: available if free or IN_PIPE_OCCUPY or START_PIPE_OCCUPY
// - IN_PIPE_OCCUPY: always available (can pipeline with any status)
bool isAvailableForOccupyStatus(const MappingLoc &loc,
int new_occupy_status) const;

// Gets the occupy status at a specific location across time domain.
// Returns -1 if the location is not occupied.
int getOccupyStatusAcrossTime(const MappingLoc &loc) const;

// Checks if a hardware resource is available across a time range.
// This function leverages the isAvailableAcrossTime function in each
// time step.
Expand Down Expand Up @@ -111,7 +142,8 @@ class MappingState {
void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const;

// Getters for state information.
const std::set<MappingLoc> &getOccupiedLocs() const {
const std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> &
getOccupiedLocs() const {
return this->occupied_locs;
}
const std::map<MappingLoc, Operation *> &getLocToOp() const {
Expand All @@ -122,7 +154,9 @@ class MappingState {
}

// Setters for state information.
void setOccupiedLocs(const std::set<MappingLoc> &locs) {
void setOccupiedLocs(
const std::map<MappingLoc, std::vector<std::pair<int, Operation *>>>
&locs) {
this->occupied_locs = locs;
}
void setLocToOp(const std::map<MappingLoc, Operation *> &loc_to_op) {
Expand All @@ -139,7 +173,9 @@ class MappingState {
bool is_spatial_only;
static constexpr int kMaxSteps = 10;

std::set<MappingLoc> occupied_locs;
// Maps location to a list of (occupy_status, operation) pairs.
// Multiple ops can occupy the same location with compatible pipeline states.
std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> occupied_locs;
std::map<MappingLoc, Operation *> loc_to_op;
std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
};
Expand All @@ -160,7 +196,7 @@ class MappingStateSnapshot {
}

private:
std::set<MappingLoc> occupied_locs;
std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> occupied_locs;
std::map<MappingLoc, Operation *> loc_to_op;
std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
};
Expand Down
7 changes: 7 additions & 0 deletions include/NeuraDialect/Mapping/mapping_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,12 @@ bool canReachLocInTime(const std::vector<Operation *> &producers,
Register *getAvailableRegister(const MappingState &mapping_state, Tile *tile,
int start_time, int exclusive_end_time);

// Gets the execution latency of an operation from its "latency" attribute.
// Returns 1 (single-cycle) if the attribute is not present.
int getOpLatency(Operation *op);

// Checks if an operation is a multi-cycle operation (latency > 1).
bool isMultiCycleOp(Operation *op);

} // namespace neura
} // namespace mlir
1 change: 1 addition & 0 deletions include/NeuraDialect/NeuraPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ std::unique_ptr<mlir::Pass> createInitPatternPass();

// Hardware optimization passes
std::unique_ptr<mlir::Pass> createHardwareMergePass();
std::unique_ptr<mlir::Pass> createInitExecLatencyPass();

#define GEN_PASS_REGISTRATION
#include "NeuraDialect/NeuraPasses.h.inc"
Expand Down
8 changes: 8 additions & 0 deletions include/NeuraDialect/NeuraPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,12 @@ def HardwareMerge : Pass<"hardware-merge", "ModuleOp"> {
}];
let constructor = "neura::createHardwareMergePass()";
}

def InitExecLatency : Pass<"init-exec-latency", "ModuleOp"> {
let summary = "Initialize execution latency information";
let description = [{
This pass initializes execution latency information.
}];
let constructor = "neura::createInitExecLatencyPass()";
}
#endif // NEURA_PASSES_TD
1 change: 1 addition & 0 deletions include/TaskflowDialect/TaskflowPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ void registerTosaToAffineConversionPassPipeline();
std::unique_ptr<mlir::Pass> createConstructHyperblockFromTaskPass();
std::unique_ptr<mlir::Pass> createClassifyCountersPass();
std::unique_ptr<mlir::Pass> createMapTaskOnCgraPass();
std::unique_ptr<mlir::Pass> createFuseTaskPass();

//=========================================================//
// Optimization Passes
Expand Down
21 changes: 21 additions & 0 deletions include/TaskflowDialect/TaskflowPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,27 @@ def MapTaskOnCgra : Pass<"map-task-on-cgra", "func::FuncOp"> {
let constructor = "taskflow::createMapTaskOnCgraPass()";
}

def FuseTask : Pass<"fuse-task", "func::FuncOp"> {
let summary = "Fuses Taskflow tasks using producer-consumer and sibling strategies";
let description = [{
Fuses taskflow.task operations using producer-consumer and sibling
fusion strategies. Uses Neura-level MII metrics for profitability analysis.

Producer-Consumer Fusion: Fuses a producer task into its consumer when
the producer's memory output feeds directly into the consumer.

Sibling Fusion: Fuses tasks that share inputs without data dependency.
}];
let constructor = "taskflow::createFuseTaskPass()";
let dependentDialects = [
"mlir::LLVM::LLVMDialect",
"mlir::func::FuncDialect",
"mlir::arith::ArithDialect",
"mlir::memref::MemRefDialect",
"mlir::neura::NeuraDialect",
"mlir::taskflow::TaskflowDialect"];
}

def MemoryAccessStreamingFusion : Pass<"memory-access-streaming-fusion", "func::FuncOp"> {
let summary = "Fuses tasks connected by memory dependencies for streaming execution";
let description = [{
Expand Down
3 changes: 1 addition & 2 deletions lib/Conversion/TaskflowToNeura/TaskflowToNeuraPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@ struct HyperblockToKernelPattern
}

// Asserts that each task contains only one hyperblock.
// (Fused tasks may contain multiple hyperblocks, which is valid.)
int hyperblock_count = 0;
task_op.walk([&](TaskflowHyperblockOp op) { hyperblock_count++; });
assert(hyperblock_count == 1 &&
"Each taskflow.task should contain only one hyperblock");

Block &hb_block = hyperblock_op.getBody().front();
Block &task_block = task_op.getBody().front();
Expand Down
Loading