Skip to content
Closed
4 changes: 4 additions & 0 deletions include/NeuraDialect/Architecture/ArchitectureSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ struct LinkOverride {
// This is set by the command line tool when a YAML file is provided.
std::string getArchitectureSpecFile();

// Function for getting the latency specification file path.
// This is set by the command line tool when a YAML file is provided.
std::string getLatencySpecFile();

// Function for getting tile defaults configuration.
TileDefaults getTileDefaults();

Expand Down
46 changes: 41 additions & 5 deletions include/NeuraDialect/Mapping/MappingState.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
namespace mlir {
namespace neura {

// Occupy status for multi-cycle pipeline support.
// These states define how a tile/FU is occupied at a given time step.
#define SINGLE_OCCUPY 0 // A single-cycle op is in the FU (exclusive)
#define START_PIPE_OCCUPY 1 // A multi-cycle op starts in the FU
#define END_PIPE_OCCUPY 2 // A multi-cycle op ends in the FU
#define IN_PIPE_OCCUPY 3 // A multi-cycle op is occupying the FU (pipelined)
Comment on lines +16 to +18
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't the 3 *_PIPE_OCCUPY overlapping with each other?

Copy link
Copy Markdown
Collaborator Author

@HobbitQia HobbitQia Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually 3 means the multi-cycle op will not occupies the input and output ports of the tile so we can map other operations onto this tile, which is inclusive execution we proposed before in our DATE paper.

However, I have not finished the implementation and test of inclusive execution so far. Here I just copied some content from CGRA-Mapper. Will tune it in the future.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So IN_PIPE_OCCUPY does not include start and end, right?


// Represents a spatial-temporal location: (resource, time_step)
struct MappingLoc {
BasicResource *resource;
Expand Down Expand Up @@ -54,9 +61,20 @@ namespace neura {
class MappingState {
public:
MappingState(const Architecture &arch, int II, bool is_spatial_only);
// Binds a (tile/link, time_step) location to an operation.
// Binds a (tile/link, time_step) location to an operation with default
// SINGLE_OCCUPY status.
bool bindOp(const MappingLoc &loc, Operation *op);

// Binds a (tile/link, time_step) location to an operation with specified
// occupy status for multi-cycle pipeline support.
bool bindOp(const MappingLoc &loc, Operation *op, int occupy_status);

// Binds multiple locations for a multi-cycle operation.
// This sets START_PIPE_OCCUPY at start_time, IN_PIPE_OCCUPY for intermediate
// times, and END_PIPE_OCCUPY at end_time-1.
bool bindMultiCycleOp(BasicResource *resource, int start_time, int latency,
Operation *op);

// Unbinds an operation from its (tile/link, time_step) location,
// which is useful for backtracking.
void unbindOp(Operation *op);
Expand All @@ -67,6 +85,19 @@ class MappingState {
// it will check (tile 2, step 1), (tile 2, step 5), (tile 2, step 9), etc.
bool isAvailableAcrossTime(const MappingLoc &loc) const;

// Checks if a location is available for a specific occupy status.
// This implements the pipeline-aware availability checking:
// - SINGLE_OCCUPY: only available if location is completely free
// - START_PIPE_OCCUPY: available if free or IN_PIPE_OCCUPY or END_PIPE_OCCUPY
// - END_PIPE_OCCUPY: available if free or IN_PIPE_OCCUPY or START_PIPE_OCCUPY
// - IN_PIPE_OCCUPY: always available (can pipeline with any status)
bool isAvailableForOccupyStatus(const MappingLoc &loc,
int new_occupy_status) const;

// Gets the occupy status at a specific location across time domain.
// Returns -1 if the location is not occupied.
int getOccupyStatusAcrossTime(const MappingLoc &loc) const;

// Checks if a hardware resource is available across a time range.
// This function leverages the isAvailableAcrossTime function in each
// time step.
Expand Down Expand Up @@ -111,7 +142,8 @@ class MappingState {
void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const;

// Getters for state information.
const std::set<MappingLoc> &getOccupiedLocs() const {
const std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> &
getOccupiedLocs() const {
return this->occupied_locs;
}
const std::map<MappingLoc, Operation *> &getLocToOp() const {
Expand All @@ -122,7 +154,9 @@ class MappingState {
}

// Setters for state information.
void setOccupiedLocs(const std::set<MappingLoc> &locs) {
void setOccupiedLocs(
const std::map<MappingLoc, std::vector<std::pair<int, Operation *>>>
&locs) {
this->occupied_locs = locs;
}
void setLocToOp(const std::map<MappingLoc, Operation *> &loc_to_op) {
Expand All @@ -139,7 +173,9 @@ class MappingState {
bool is_spatial_only;
static constexpr int kMaxSteps = 10;

std::set<MappingLoc> occupied_locs;
// Maps location to a list of (occupy_status, operation) pairs.
// Multiple ops can occupy the same location with compatible pipeline states.
std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> occupied_locs;
std::map<MappingLoc, Operation *> loc_to_op;
std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
};
Expand All @@ -160,7 +196,7 @@ class MappingStateSnapshot {
}

private:
std::set<MappingLoc> occupied_locs;
std::map<MappingLoc, std::vector<std::pair<int, Operation *>>> occupied_locs;
std::map<MappingLoc, Operation *> loc_to_op;
std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
};
Expand Down
7 changes: 7 additions & 0 deletions include/NeuraDialect/Mapping/mapping_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,12 @@ bool canReachLocInTime(const std::vector<Operation *> &producers,
Register *getAvailableRegister(const MappingState &mapping_state, Tile *tile,
int start_time, int exclusive_end_time);

// Gets the execution latency of an operation from its "latency" attribute.
// Returns 1 (single-cycle) if the attribute is not present.
int getOpLatency(Operation *op);

// Checks if an operation is a multi-cycle operation (latency > 1).
bool isMultiCycleOp(Operation *op);

} // namespace neura
} // namespace mlir
2 changes: 2 additions & 0 deletions include/NeuraDialect/NeuraPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ std::unique_ptr<mlir::Pass> createWrapLoopInKernelPass();
// Hardware specific optimization passes
std::unique_ptr<mlir::Pass> createFuseLoopControlPass();
std::unique_ptr<mlir::Pass> createFusePatternPass();
std::unique_ptr<mlir::Pass> createFuseKernelPass();

// Hardware agnostic optimization passes
std::unique_ptr<mlir::Pass> createFoldConstantPass();
Expand All @@ -49,6 +50,7 @@ std::unique_ptr<mlir::Pass> createInitPatternPass();

// Hardware optimization passes
std::unique_ptr<mlir::Pass> createHardwareMergePass();
std::unique_ptr<mlir::Pass> createInitExecLatencyPass();

#define GEN_PASS_REGISTRATION
#include "NeuraDialect/NeuraPasses.h.inc"
Expand Down
23 changes: 23 additions & 0 deletions include/NeuraDialect/NeuraPasses.td
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@ def FusePattern : Pass<"fuse-pattern", "ModuleOp"> {
let constructor = "neura::createFusePatternPass()";
}

def FuseKernel : Pass<"fuse-kernel", "ModuleOp"> {
let summary = "Fuses kernel operations in the Neura dialect";
let description = [{
This pass fuses neura.kernel operations using producer-consumer and sibling
fusion strategies, inspired by MLIR's linalg and affine loop fusion.

Producer-Consumer Fusion: Fuses a producer kernel into its consumer when
the producer's output is only used by the consumer.

Sibling Fusion: Fuses kernels that share the same input operands and have
no data dependencies between them.
}];
let constructor = "neura::createFuseKernelPass()";
}

def InsertDataMov : Pass<"insert-data-mov", "ModuleOp"> {
let summary = "Inserts data move operations in the Neura dialect";
let description =
Expand Down Expand Up @@ -194,4 +209,12 @@ def HardwareMerge : Pass<"hardware-merge", "ModuleOp"> {
}];
let constructor = "neura::createHardwareMergePass()";
}

def InitExecLatency : Pass<"init-exec-latency", "ModuleOp"> {
let summary = "Initialize execution latency information";
let description = [{
This pass initializes execution latency information.
}];
let constructor = "neura::createInitExecLatencyPass()";
}
#endif // NEURA_PASSES_TD
Loading