Skip to content

Commit 432d85b

Browse files
authored
Merge pull request #297 from ShangkunLi/predicate-handling
Add Mapping Constraint for Iteration Mismatch
2 parents 922b1bb + 2703281 commit 432d85b

File tree

9 files changed

+550
-6695
lines changed

9 files changed

+550
-6695
lines changed

lib/NeuraDialect/Architecture/Architecture.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -582,15 +582,16 @@ Architecture::Architecture(int multi_cgra_rows, int multi_cgra_columns,
582582
std::unique_ptr<Architecture> Architecture::cloneWithNewDimensions(
583583
int new_per_cgra_rows, int new_per_cgra_columns,
584584
const std::vector<TileOverride> &additional_overrides) const {
585-
585+
586586
std::vector<TileOverride> merged_overrides = tile_overrides_;
587-
merged_overrides.insert(merged_overrides.end(), additional_overrides.begin(), additional_overrides.end());
587+
merged_overrides.insert(merged_overrides.end(), additional_overrides.begin(),
588+
additional_overrides.end());
588589

589590
return std::make_unique<Architecture>(
590591
multi_cgra_rows_, multi_cgra_columns_, multi_cgra_base_topology_,
591592
new_per_cgra_rows, new_per_cgra_columns, max_ctrl_mem_items_,
592-
per_cgra_base_topology_, tile_defaults_, merged_overrides,
593-
link_defaults_, link_overrides_);
593+
per_cgra_base_topology_, tile_defaults_, merged_overrides, link_defaults_,
594+
link_overrides_);
594595
}
595596

596597
Tile *Architecture::getTile(int id) {

lib/NeuraDialect/Mapping/mapping_util.cpp

Lines changed: 108 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
#include <deque>
22
#include <queue>
33

4+
#include "NeuraDialect/Architecture/Architecture.h"
5+
#include "NeuraDialect/Mapping/MappingState.h"
46
#include "NeuraDialect/Mapping/mapping_util.h"
57
#include "NeuraDialect/NeuraOps.h"
68
#include "mlir/Dialect/Func/IR/FuncOps.h"
79
#include "mlir/IR/BuiltinAttributes.h"
810
#include "mlir/IR/Operation.h"
911
#include "llvm/ADT/DenseMap.h"
1012
#include "llvm/ADT/DenseSet.h"
11-
#include "llvm/Support/Error.h"
1213
#include "llvm/Support/raw_ostream.h"
1314
#include <cassert>
1415

@@ -197,7 +198,7 @@ mlir::neura::collectRecurrenceCycles(Region &region) {
197198

198199
region.walk([&](neura::CtrlMovOp ctrl_mov_op) {
199200
Value target = ctrl_mov_op.getTarget();
200-
auto reserve_op = target.getDefiningOp<neura::ReserveOp>();
201+
neura::ReserveOp reserve_op = target.getDefiningOp<neura::ReserveOp>();
201202
if (!reserve_op) {
202203
return;
203204
}
@@ -218,7 +219,7 @@ mlir::neura::collectRecurrenceCycles(Region &region) {
218219
traverseAlongPath(parent_op, reserve_value, current_path, visited_in_path,
219220
collected_paths);
220221

221-
for (auto &cycle : collected_paths) {
222+
for (RecurrenceCycle &cycle : collected_paths) {
222223
cycle.operations.push_back(ctrl_mov_op);
223224
recurrence_cycles.push_back(std::move(cycle));
224225
}
@@ -261,9 +262,10 @@ mlir::neura::getTopologicallySortedOps(Region &region) {
261262
std::deque<Operation *> ready_queue;
262263

263264
// Collects recurrence cycle ops.
264-
auto recurrence_cycles = collectRecurrenceCycles(region);
265+
SmallVector<RecurrenceCycle> recurrence_cycles =
266+
collectRecurrenceCycles(region);
265267
llvm::DenseSet<Operation *> recurrence_ops;
266-
for (const auto &cycle : recurrence_cycles) {
268+
for (const RecurrenceCycle &cycle : recurrence_cycles) {
267269
for (Operation *op : cycle.operations) {
268270
recurrence_ops.insert(op);
269271
}
@@ -459,7 +461,8 @@ mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
459461

460462
assert(isa<neura::ReserveOp>(target.getDefiningOp()) &&
461463
"Expected the user of ctrl_mov target to be a reserve operation");
462-
auto reserve_op = dyn_cast<neura::ReserveOp>(target.getDefiningOp());
464+
neura::ReserveOp reserve_op =
465+
dyn_cast<neura::ReserveOp>(target.getDefiningOp());
463466

464467
// Skip ctrl_mov users of reserve; return the first materialized user.
465468
for (Operation *user : reserve_op.getResult().getUsers()) {
@@ -479,6 +482,72 @@ mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
479482
"No materialized backward user (i.e., phi) found for ctrl_mov");
480483
}
481484

485+
// This struct represents a pending data_mov/ctrl_mov that is being routed,
486+
// along with its routing path.
487+
struct PendingRoute {
488+
Operation *mov_op;
489+
std::vector<MappingLoc> path;
490+
};
491+
492+
bool hasSafeOperandIterationAtConsume(
493+
Operation *op, const std::vector<PendingRoute> &operand_routes, int ii) {
494+
assert(ii > 0 && "II should be positive");
495+
496+
if (operand_routes.empty()) {
497+
return true;
498+
}
499+
500+
for (const PendingRoute &route : operand_routes) {
501+
// Records the time range that each register is occupied on this route.
502+
// <Register*, <min_time, max_time>>, this means the register is occupied
503+
// from min_time to max_time.
504+
DenseMap<Register *, std::pair<int, int>> reg_time_range;
505+
for (const MappingLoc &loc : route.path) {
506+
Register *reg = dyn_cast<Register>(loc.resource);
507+
if (!reg) {
508+
continue;
509+
}
510+
511+
// For each register, tracks its live interval on this path by keeping
512+
// the earliest and latest time it appears.
513+
// Inserts a new entry if the register is seen for the first time.
514+
auto [it, inserted] = reg_time_range.try_emplace(
515+
reg, std::make_pair(loc.time_step, loc.time_step));
516+
517+
// If this register has been seen before, updates the time range to
518+
// include the new time step.
519+
if (!inserted) {
520+
// Updates the min_time for this seen register if the new time step is
521+
// earlier.
522+
it->second.first = std::min(it->second.first, loc.time_step);
523+
// Updates the max_time for this seen register if the new time step is
524+
// later.
525+
it->second.second = std::max(it->second.second, loc.time_step);
526+
}
527+
}
528+
529+
// Register occupancy is tracked in per-cycle slots (half-open in routing
530+
// builders), so max_t - min_t + 1 corresponds to the hold duration.
531+
for (const auto &entry : reg_time_range) {
532+
Register *reg = entry.first;
533+
int min_t = entry.second.first;
534+
int max_t = entry.second.second;
535+
int occupancy = max_t - min_t + 1;
536+
if (occupancy > ii) {
537+
llvm::errs() << "[DEBUG] Reject schedule due to register hold >= next "
538+
"iteration window. op="
539+
<< *op << ", II=" << ii << ", reg=#" << reg->getId()
540+
<< ", hold_start=" << min_t << ", hold_end=" << max_t
541+
<< ", hold_len=" << occupancy
542+
<< ", mov_op=" << *route.mov_op << "\n";
543+
return false;
544+
}
545+
}
546+
}
547+
548+
return true;
549+
}
550+
482551
llvm::SmallVector<mlir::Operation *>
483552
mlir::neura::getMaterializedUserOps(Operation *op) {
484553
llvm::SmallVector<Operation *> result;
@@ -508,7 +577,7 @@ mlir::neura::getMaterializedUserOps(Operation *op) {
508577

509578
// Specially handles the ctrl_mov, i.e., the second operand of ctrl_mov is
510579
// treated as a target/destination/user in terms of dataflow.
511-
if (auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(curr)) {
580+
if (neura::CtrlMovOp ctrl_mov = dyn_cast<neura::CtrlMovOp>(curr)) {
512581
Value target = ctrl_mov.getTarget();
513582
for (Operation *user : target.getUsers()) {
514583
if (visited.insert(user).second) {
@@ -737,8 +806,8 @@ Operation *mlir::neura::getMaterializedProducer(Value operand) {
737806
assert(
738807
isa<neura::DataMovOp>(producer) &&
739808
"Expected a DataMovOp as operand producer for non-ReserveOp operations");
740-
auto mov_op = dyn_cast<neura::DataMovOp>(producer);
741-
auto materialized_producer = mov_op.getOperand().getDefiningOp();
809+
neura::DataMovOp mov_op = dyn_cast<neura::DataMovOp>(producer);
810+
Operation *materialized_producer = mov_op.getOperand().getDefiningOp();
742811
return materialized_producer;
743812
}
744813

@@ -751,7 +820,8 @@ int mlir::neura::getPhysicalHops(const std::vector<Operation *> &producers,
751820

752821
for (Operation *producer : producers) {
753822
// Get the last location of the producer.
754-
auto producer_locs = mapping_state.getAllLocsOfOp(producer);
823+
std::vector<MappingLoc> producer_locs =
824+
mapping_state.getAllLocsOfOp(producer);
755825
assert(!producer_locs.empty() && "No locations found for producer");
756826

757827
MappingLoc producer_loc = producer_locs.back();
@@ -770,7 +840,8 @@ bool mlir::neura::canReachLocInTime(const std::vector<Operation *> &producers,
770840

771841
for (Operation *producer : producers) {
772842
// Get the last location of the producer.
773-
auto producer_locs = mapping_state.getAllLocsOfOp(producer);
843+
std::vector<MappingLoc> producer_locs =
844+
mapping_state.getAllLocsOfOp(producer);
774845
assert(!producer_locs.empty() && "No locations found for producer");
775846

776847
MappingLoc producer_loc = producer_locs.back();
@@ -914,7 +985,7 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
914985
// Assembles all the backward users if exist.
915986
std::vector<Operation *> backward_users;
916987
for (Operation *user : getCtrlMovUsers(op)) {
917-
auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
988+
neura::CtrlMovOp ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
918989
assert(ctrl_mov && "Expected user to be a CtrlMovOp");
919990
mlir::Operation *materialized_backward_op =
920991
getMaterializedBackwardUser(ctrl_mov);
@@ -972,7 +1043,7 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
9721043

9731044
// Computes proximity bonus to backward users. Closer is better for
9741045
// recurrence routing.
975-
for (auto &backward_user_loc : backward_users_locs) {
1046+
for (MappingLoc &backward_user_loc : backward_users_locs) {
9761047
Tile *backward_tile = dyn_cast<Tile>(backward_user_loc.resource);
9771048
if (backward_tile) {
9781049
int backward_hops = std::abs(backward_tile->getX() - tile->getX()) +
@@ -1002,7 +1073,7 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
10021073
producers.empty() ||
10031074
canReachLocInTime(producers, tile_loc_candidate, t, mapping_state);
10041075
bool meet_backward_user_constraint = true;
1005-
for (auto &backward_user_loc : backward_users_locs) {
1076+
for (MappingLoc &backward_user_loc : backward_users_locs) {
10061077
// Checks if the location can reach all backward users.
10071078
if (!canReachLocInTime(tile_loc_candidate, backward_user_loc,
10081079
backward_user_loc.time_step +
@@ -1025,12 +1096,12 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
10251096
int occupied_in = 0;
10261097
int occupied_out = 0;
10271098

1028-
for (auto *link : tile->getInLinks()) {
1099+
for (Link *link : tile->getInLinks()) {
10291100
if (!mapping_state.isAvailableAcrossTime({link, t})) {
10301101
occupied_in++;
10311102
}
10321103
}
1033-
for (auto *link : tile->getOutLinks()) {
1104+
for (Link *link : tile->getOutLinks()) {
10341105
if (!mapping_state.isAvailableAcrossTime({link, t})) {
10351106
occupied_out++;
10361107
}
@@ -1127,11 +1198,11 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
11271198
target_loc.resource, target_loc.time_step, latency, op);
11281199
if (bind_success) {
11291200
llvm::errs() << "[DEBUG] Bound multi-cycle op (latency=" << latency
1130-
<< ") " << *op << " onto loc: "
1131-
<< target_loc.resource->getType() << "#"
1201+
<< ") " << *op
1202+
<< " onto loc: " << target_loc.resource->getType() << "#"
11321203
<< target_loc.resource->getId()
1133-
<< " @t=" << target_loc.time_step << " to t="
1134-
<< (target_loc.time_step + latency - 1) << "\n";
1204+
<< " @t=" << target_loc.time_step
1205+
<< " to t=" << (target_loc.time_step + latency - 1) << "\n";
11351206
}
11361207
} else {
11371208
// For single-cycle ops, use default SINGLE_OCCUPY binding
@@ -1145,6 +1216,7 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
11451216
}
11461217

11471218
if (bind_success) {
1219+
std::vector<PendingRoute> pending_operand_routes;
11481220
std::vector<Operation *> routed_operands;
11491221
std::vector<Operation *> routed_ctrl_movs;
11501222
// Tries to route the data move operations.
@@ -1172,8 +1244,8 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
11721244
std::vector<MappingLoc> route_path;
11731245
if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state,
11741246
route_path)) {
1175-
// Reserves the route for the data move operation.
11761247
mapping_state.reserveRoute(data_move, route_path);
1248+
pending_operand_routes.push_back({data_move, std::move(route_path)});
11771249
routed_operands.push_back(data_move);
11781250
llvm::errs() << "[DEBUG] Successfully routed data move: " << *data_move
11791251
<< " from " << src_loc.resource->getType() << "#"
@@ -1191,15 +1263,25 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
11911263
<< " @t=" << target_loc.time_step << "; so unschedule op\n";
11921264
mapping_state.unbindOp(op);
11931265
for (Operation *routed_op : routed_operands) {
1194-
llvm::errs() << "[DEBUG] Releasing route for routed operand: "
1195-
<< *routed_op << "\n";
11961266
mapping_state.releaseRoute(routed_op);
11971267
}
11981268
return false;
11991269
}
1270+
1271+
if (!hasSafeOperandIterationAtConsume(op, pending_operand_routes,
1272+
mapping_state.getII())) {
1273+
llvm::errs() << "[DEBUG] Operand iteration shift at consume time; "
1274+
"unschedule op\n";
1275+
mapping_state.unbindOp(op);
1276+
for (Operation *routed_op : routed_operands) {
1277+
mapping_state.releaseRoute(routed_op);
1278+
}
1279+
return false;
1280+
}
1281+
12001282
// Checks whether the operation's user is a ctrl_mov.
12011283
for (Operation *user : getCtrlMovUsers(op)) {
1202-
auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
1284+
neura::CtrlMovOp ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
12031285
llvm::errs() << "[DEBUG] Found ctrl_mov user: " << *ctrl_mov << "\n";
12041286
assert(ctrl_mov && "Expected user to be a CtrlMovOp");
12051287
mlir::Operation *materialized_backward_op =
@@ -1251,13 +1333,11 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
12511333

12521334
int mlir::neura::getOpLatency(Operation *op) {
12531335
// Try to get the latency attribute from the operation
1254-
if (auto latency_attr = op->getAttrOfType<IntegerAttr>("latency")) {
1336+
if (IntegerAttr latency_attr = op->getAttrOfType<IntegerAttr>("latency")) {
12551337
return latency_attr.getInt();
12561338
}
12571339
// Default to single-cycle if no latency attribute is present
12581340
return 1;
12591341
}
12601342

1261-
bool mlir::neura::isMultiCycleOp(Operation *op) {
1262-
return getOpLatency(op) > 1;
1263-
}
1343+
bool mlir::neura::isMultiCycleOp(Operation *op) { return getOpLatency(op) > 1; }

lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -436,10 +436,11 @@ identifyDirectDominatingLiveIns(Region &region, DominanceInfo &dom_info,
436436
continue;
437437
}
438438

439-
// If the using block is a loop header (has a back-edge), we must NOT treat
440-
// any live-in as a direct dominating live-in. This is because:
439+
// If the using block is a loop header (has a back-edge), we must NOT
440+
// treat any live-in as a direct dominating live-in. This is because:
441441
// 1. Live-ins from outer scopes have rate mismatch and need PHI_START
442-
// 2. Live-ins from inner blocks are loop-carried dependencies that need PHI
442+
// 2. Live-ins from inner blocks are loop-carried dependencies that need
443+
// PHI
443444
bool using_block_is_loop_header = false;
444445
for (Block *pred : block.getPredecessors()) {
445446
if (dom_info.dominates(&block, pred)) {

test/Conversion/c2llvm2mlir/nested_loop/test.mlir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@
2222
// RUN: --fold-constant \
2323
// RUN: --insert-data-mov \
2424
// RUN: --map-to-accelerator="mapping-strategy=heuristic backtrack-config=simple" \
25-
// RUN: --architecture-spec=../../../arch_spec/architecture.yaml %t-kernel.mlir | FileCheck %s --check-prefix=CHECK-LLVM2NEURA-MAP
25+
// RUN: --architecture-spec=../../../arch_spec/architecture.yaml %t-kernel.mlir \
26+
// RUN: -o %t-kernel-mapped.mlir
27+
// RUN: FileCheck %s --input-file=%t-kernel-mapped.mlir --check-prefix=CHECK-LLVM2NEURA-MAP
2628

2729
// CHECK-LLVM2NEURA: accelerator = "neura"
2830
// CHECK-LLVM2NEURA: %25 = neura.alloca %24 : !neura.data<i32, i1> -> !neura.data<!llvm.ptr, i1>
2931
// CHECK-LLVM2NEURA: %38 = neura.phi_start %37, %36 : !neura.data<i32, i1>, !neura.data<i32, i1> -> !neura.data<i32, i1>
3032
// CHECK-LLVM2NEURA: %188 = neura.sext %187 : !neura.data<i32, i1> -> !neura.data<i64, i1>
3133
// CHECK-LLVM2NEURA: %207 = "neura.mul"(%205, %206) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
3234

33-
// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", dataflow_mode = "predicate", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 11 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 6 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {
35+
// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", dataflow_mode = "predicate", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 14 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 6 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {

0 commit comments

Comments
 (0)