diff --git a/include/NeuraDialect/Architecture/Architecture.h b/include/NeuraDialect/Architecture/Architecture.h index f40ae183..2d560e75 100644 --- a/include/NeuraDialect/Architecture/Architecture.h +++ b/include/NeuraDialect/Architecture/Architecture.h @@ -11,6 +11,12 @@ namespace mlir { namespace neura { +// Enum for identifying resource type. +enum class ResourceKind { + Tile, + Link, +}; + //===----------------------------------------------------------------------===// // BasicResource: abstract base class for Tile, Link, etc. //===----------------------------------------------------------------------===// @@ -20,6 +26,7 @@ class BasicResource { virtual ~BasicResource() = default; virtual int getId() const = 0; virtual std::string getType() const = 0; + virtual ResourceKind getKind() const = 0; }; //===----------------------------------------------------------------------===// @@ -37,6 +44,12 @@ class Tile : public BasicResource { int getId() const override; std::string getType() const override { return "tile"; } + ResourceKind getKind() const override { return ResourceKind::Tile; } + + static bool classof(const BasicResource *res) { + return res && res->getKind() == ResourceKind::Tile; + } + int getX() const; int getY() const; @@ -64,8 +77,14 @@ class Link : public BasicResource { Link(int id); int getId() const override; + std::string getType() const override { return "link"; } + ResourceKind getKind() const override { return ResourceKind::Link; } + + static bool classof(const BasicResource *res) { + return res && res->getKind() == ResourceKind::Link; + } Tile* getSrcTile() const; Tile* getDstTile() const; @@ -83,7 +102,8 @@ struct PairHash { } }; -/// Describes the entire CGRA architecture. +// Describes the CGRA architecture template. +// TODO: Model architecture in detail (e.g., registers, ports). class Architecture { public: Architecture(int width, int height); @@ -98,8 +118,9 @@ class Architecture { std::vector getAllLinks() const; private: + // TODO: Model architecture in detail, e.g., ports, registers, crossbars, etc. + // https://github.com/coredac/dataflow/issues/52. std::vector> tile_storage; -// std::vector tiles; std::vector> link_storage; std::unordered_map id_to_tile; std::unordered_map, Tile*, PairHash> coord_to_tile; diff --git a/include/NeuraDialect/Mapping/MappingState.h b/include/NeuraDialect/Mapping/MappingState.h index ed10b265..5736f654 100644 --- a/include/NeuraDialect/Mapping/MappingState.h +++ b/include/NeuraDialect/Mapping/MappingState.h @@ -2,7 +2,8 @@ #define NEURA_MAPPING_STATE_H #include "mlir/IR/Operation.h" -#include "NeuraDialect/Architecture/Architecture.h" // for BasicResource +#include "NeuraDialect/Architecture/Architecture.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -19,6 +20,12 @@ struct MappingLoc { bool operator==(const MappingLoc &other) const { return resource == other.resource && time_step == other.time_step; } + + bool operator<(const MappingLoc &other) const { + if (time_step != other.time_step) + return time_step < other.time_step; + return resource->getId() < other.resource->getId(); + } }; } // namespace neura @@ -43,44 +50,65 @@ class MappingState { public: MappingState(const Architecture &arch, int II); // Binds a (tile/link, time_step) location to an operation. - void bindOp(MappingLoc loc, Operation *op); + bool bindOp(const MappingLoc &loc, Operation *op); + + // Unbinds an operation from its (tile/link, time_step) location, + // which is useful for backtracking. + void unbindOp(Operation *op); // Checks if a (tile/link, time_step) is available (unoccupied). - bool isAvailable(const MappingLoc &loc) const; + // Note that the check is performed in II granularity. + // For example, if II is 4, and we want to check (tile 2, step 5), then + // it will check (tile 2, step 1), (tile 2, step 5), (tile 2, step 9), etc. + bool isAvailableAcrossTime(const MappingLoc &loc) const; // Gets the operation at a specific (tile/link, time_step) location. std::optional getOpAt(MappingLoc loc) const; + // Counts the number of operations at a specific resource across time steps. + int countOpsAtResource(BasicResource *resource) const; + // Gets all MRRG nodes. - const std::unordered_set &getAllLocs() const; + const std::set &getAllLocs() const; + + // Gets all MRRG nodes allocated to a given op. + const std::vector &getAllLocsOfOp(Operation *op) const; + + // Reserves links for an move operation. + void reserveRoute(Operation *op, ArrayRef path); + + // Releases links for an move operation. + void releaseRoute(Operation *op); // Gets neighboring tiles on next step of a given MappingLoc. - const std::vector &getNextStepTiles(MappingLoc loc) const; + std::vector getNextStepTiles(MappingLoc loc) const; - // Gets neighboring links on next step of a given MappingLoc. - const std::vector &getNextStepLinks(MappingLoc loc) const; +// // Gets neighboring links on next step of a given MappingLoc. +// const std::vector &getNextStepLinks(MappingLoc loc) const; - // Gets neighboring tiles on current step of a given MappingLoc. - const std::vector &getCurrentStepTiles(MappingLoc loc) const; +// // Gets neighboring tiles on current step of a given MappingLoc. +// const std::vector &getCurrentStepTiles(MappingLoc loc) const; // Gets neighboring links on current step of a given MappingLoc. - const std::vector &getCurrentStepLinks(MappingLoc loc) const; + std::vector getCurrentStepLinks(MappingLoc loc) const; + // Gets the target initiation interval (II) for the mapping. int getII() const { return II; } + // Embeds the mapping states onto the mapped operations. + void encodeMappingState(); + + void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const; + private: // Initiation interval. int II; - std::unordered_set all_locs; - // current and next step tiles and links for a given MappingLoc. Note that - // the key MappingLoc is either a pair of (tile, time_step) or (link, time_step). - std::unordered_map> next_step_tiles; - std::unordered_map> next_step_links; - std::unordered_map> current_step_tiles; - std::unordered_map> current_step_links; - - std::unordered_map loc_to_op; - std::unordered_set occupied_locs; + static constexpr int kMaxSteps = 10; + + std::set all_locs; + std::set occupied_locs; + std::map loc_to_op; + std::map> op_to_locs; }; } // namespace neura diff --git a/include/NeuraDialect/Mapping/mapping_util.h b/include/NeuraDialect/Mapping/mapping_util.h index ca3a4b45..1864dd5e 100644 --- a/include/NeuraDialect/Mapping/mapping_util.h +++ b/include/NeuraDialect/Mapping/mapping_util.h @@ -2,6 +2,7 @@ #include "mlir/IR/Operation.h" #include "NeuraDialect/Architecture/Architecture.h" +#include "NeuraDialect/Mapping/MappingState.h" namespace mlir { namespace neura { @@ -18,5 +19,70 @@ SmallVector collectRecurrenceCycles(Operation *func_op); // Calculates ResMII: ceil(#ops / #tiles). int calculateResMii(Operation *func_op, const Architecture &architecture); +// Returns topologically sorted operations in func_op. +std::vector getTopologicallySortedOps(Operation *func_op); + +Operation* getMaterializedProducer(Value operand); + +// Collects the real users of an operation, excluding ctrl_mov and data_mov. +llvm::SmallVector getMaterializedUserOps(Operation *op); + +// Gets the last materialized backward user of an operation, which is expected +// to be a phi operation. +Operation *getMaterializedBackwardUser(Operation *op); + +// Attempts to map a function operation to the accelerator using heuristics. +bool tryHeuristicMapping(std::vector &sorted_ops, + const Architecture &architecture, + MappingState &mapping_state); + +// Attempts to route a data move operation from src_loc to dst_loc. +bool tryRouteDataMove(Operation *mov, + MappingLoc src_loc, + MappingLoc dst_loc, + bool is_backward_move, + const MappingState &mapping_state, + std::vector &path_out); + +bool tryRouteForwardMove(Operation *mov_op, + MappingLoc src_loc, + MappingLoc dst_loc, + const MappingState &state, + std::vector &path_out); + +bool tryRouteBackwardMove(Operation *mov_op, + MappingLoc src_loc, + MappingLoc dst_loc, + const MappingState &state, + std::vector &path_out); + +// Calculates the cost of mapping locations for a given op, the returned locations +// are sorted based on the cost. +std::vector calculateCost(Operation *op, const MappingState &mapping_state); + +// Gets the ctrl_mov users of an operation, empty vector is returned if no ctrl_mov users found. +llvm::SmallVector getCtrlMovUsers(Operation *op); + +// Maps a materialized operation to the accelerator, and routes the dataflow from +// the producers to the given op. +bool placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state); + +std::vector calculateAward(Operation *op, + const Architecture &architecture, + const MappingState &mapping_state); + +void updateAward(std::map &locs_with_award, + MappingLoc loc, int award); + +bool canReachLocInTime(const MappingLoc &src_loc, + const MappingLoc &dst_loc, + int deadline_step, + const MappingState &mapping_state); + +bool canReachLocInTime(const std::vector &producers, + const MappingLoc &target_loc, + int deadline_step, + const MappingState &mapping_state); + } // namespace neura } // namespace mlir diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h index 8fa986f2..8b444603 100644 --- a/include/NeuraDialect/NeuraPasses.h +++ b/include/NeuraDialect/NeuraPasses.h @@ -25,6 +25,7 @@ std::unique_ptr createAssignAcceleratorPass(); std::unique_ptr createTransformCtrlToDataFlowPass(); std::unique_ptr createLeveragePredicatedValuePass(); std::unique_ptr createMapToAcceleratorPass(); +std::unique_ptr createGenerateCodePass(); #define GEN_PASS_REGISTRATION #include "NeuraDialect/NeuraPasses.h.inc" diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td index 426fe6d0..823a4b55 100644 --- a/include/NeuraDialect/NeuraPasses.td +++ b/include/NeuraDialect/NeuraPasses.td @@ -57,4 +57,14 @@ def MapToAccelerator : Pass<"map-to-accelerator", "ModuleOp"> { }]; let constructor = "neura::createMapToAcceleratorPass()"; } + +def GenerateCode : Pass<"generate-code", "ModuleOp"> { + let summary = "Generate JSON-formatted instructions from mapped Neura IR"; + let description = [{ + This pass generates JSON file containing the instructions. + The instructions can be encoded into configuration signals. + }]; + let constructor = "neura::createGenerateCodePass()"; +} + #endif // NEURA_PASSES_TD \ No newline at end of file diff --git a/lib/NeuraDialect/Mapping/MappingState.cpp b/lib/NeuraDialect/Mapping/MappingState.cpp index f4189429..75485ae4 100644 --- a/lib/NeuraDialect/Mapping/MappingState.cpp +++ b/lib/NeuraDialect/Mapping/MappingState.cpp @@ -1,37 +1,48 @@ #include "NeuraDialect/Mapping/MappingState.h" +#include "mlir/IR/BuiltinTypes.h" using namespace mlir; using namespace mlir::neura; MappingState::MappingState(const Architecture &arch, int II) : II(II) { + // TODO: Use number of operations to determine the max steps for constructing MRRG. for (Tile* tile : arch.getAllTiles()) { - for (int t = 0; t < II; ++t) { + for (int t = 0; t < II * kMaxSteps; ++t) { MappingLoc loc = {tile, t}; all_locs.insert(loc); - - // Create edges to neighboring tiles at t+1. - for (Tile* dst : tile->getDstTiles()) { - MappingLoc next_step_dst_tile_loc = {dst, (t + 1) % II}; // modulo II for reuse - next_step_tiles[loc].push_back(next_step_dst_tile_loc); - } - - // TODO: Not sure whether we need the link on t or t+1. - // Creates edges to neighboring links at t. - for (Link* dst : tile->getOutLinks()) { - MappingLoc current_step_dst_link_loc = {dst, t % II}; - next_step_tiles[loc].push_back(current_step_dst_link_loc); - } } } } -void MappingState::bindOp(MappingLoc loc, Operation *op) { +bool MappingState::bindOp(const MappingLoc &loc, Operation *op) { loc_to_op[loc] = op; occupied_locs.insert(loc); + auto it = op_to_locs.find(op); + assert (it == op_to_locs.end() && "Operation already has reserved locations"); + op_to_locs[op].push_back(loc); + return true; } -bool MappingState::isAvailable(const MappingLoc &loc) const { - return occupied_locs.find(loc) == occupied_locs.end(); +void MappingState::unbindOp(Operation *op) { + auto it = op_to_locs.find(op); + if (it == op_to_locs.end()) return; + + for (const MappingLoc &loc : it->second) { + loc_to_op.erase(loc); + occupied_locs.erase(loc); + } + + op_to_locs.erase(it); +} + +bool MappingState::isAvailableAcrossTime(const MappingLoc &loc) const { + for (int t = loc.time_step % II; t < II * kMaxSteps; t += II) { + MappingLoc checkLoc = loc; + checkLoc.time_step = t; + if (occupied_locs.find(checkLoc) != occupied_locs.end()) + return false; + } + return true; } std::optional MappingState::getOpAt(MappingLoc loc) const { @@ -40,30 +51,161 @@ std::optional MappingState::getOpAt(MappingLoc loc) const { return it->second; } -const std::unordered_set &MappingState::getAllLocs() const { +int MappingState::countOpsAtResource(BasicResource *resource) const { + int count = 0; + for (const auto &[loc, op] : loc_to_op) { + if (loc.resource == resource) { + count++; + } + } + return count; +} + +const std::set &MappingState::getAllLocs() const { return all_locs; } -const std::vector &MappingState::getNextStepTiles(MappingLoc loc) const { +const std::vector &MappingState::getAllLocsOfOp(Operation *op) const { + auto it = op_to_locs.find(op); + if (it != op_to_locs.end()) + return it->second; + static const std::vector empty; - auto it = next_step_tiles.find(loc); - return it != next_step_tiles.end() ? it->second : empty; + return empty; } -const std::vector &MappingState::getNextStepLinks(MappingLoc loc) const { - static const std::vector empty; - auto it = next_step_links.find(loc); - return it != next_step_links.end() ? it->second : empty; +std::vector MappingState::getNextStepTiles(MappingLoc loc) const { + std::vector next_step_tiles; + const int next_step = loc.time_step + 1; + assert(next_step < II * kMaxSteps && "Next step exceeds max steps"); + // Collects neighboring tiles at t+1 for both tile and link. + if (loc.resource->getKind() == ResourceKind::Tile) { + Tile *tile = dyn_cast(loc.resource); + for (Tile* dst : tile->getDstTiles()) { + MappingLoc next_step_dst_tile_loc = {dst, next_step}; + next_step_tiles.push_back(next_step_dst_tile_loc); + } + // Includes self for reuse. + next_step_tiles.push_back({tile, next_step}); + } else if (loc.resource->getKind() == ResourceKind::Link) { + Link *link = dyn_cast(loc.resource); + Tile* dst = link->getDstTile(); + MappingLoc next_step_dst_tile_loc = {dst, next_step}; + next_step_tiles.push_back(next_step_dst_tile_loc); + } + return next_step_tiles; } -const std::vector &MappingState::getCurrentStepTiles(MappingLoc loc) const { - static const std::vector empty; - auto it = current_step_tiles.find(loc); - return it != current_step_tiles.end() ? it->second : empty; +// const std::vector &MappingState::getNextStepLinks(MappingLoc loc) const { +// static const std::vector empty; +// auto it = next_step_links.find(loc); +// return it != next_step_links.end() ? it->second : empty; +// } + +// const std::vector &MappingState::getCurrentStepTiles(MappingLoc loc) const { +// static const std::vector empty; +// auto it = current_step_tiles.find(loc); +// return it != current_step_tiles.end() ? it->second : empty; +// } + +std::vector MappingState::getCurrentStepLinks(MappingLoc loc) const { + assert((loc.resource->getKind() == ResourceKind::Tile) && + "Current step links can only be queried for tiles"); + std::vector current_step_links; + const int current_step = loc.time_step; + assert(current_step < II * kMaxSteps && "Current step exceeds max steps"); + // Collects neighboring tiles at t for given tile. + Tile *tile = dyn_cast(loc.resource); + for (Link* out_link : tile->getOutLinks()) { + MappingLoc current_step_out_link_loc = {out_link, current_step}; + current_step_links.push_back(current_step_out_link_loc); + } + return current_step_links; } -const std::vector &MappingState::getCurrentStepLinks(MappingLoc loc) const { - static const std::vector empty; - auto it = current_step_links.find(loc); - return it != current_step_links.end() ? it->second : empty; +void MappingState::reserveRoute(Operation *op, ArrayRef path) { + + // Records all mapping locations. + assert(op_to_locs.find(op) == op_to_locs.end() && + "Operation already has reserved locations"); + op_to_locs[op] = std::vector(path.begin(), path.end()); + + for (const MappingLoc &loc : path) { + assert(occupied_locs.find(loc) == occupied_locs.end() && + "Mapping location already occupied"); + loc_to_op[loc] = op; + assert(occupied_locs.find(loc) == occupied_locs.end() && + "Mapping location already occupied in occupied_locs"); + occupied_locs.insert(loc); + } +} + +void MappingState::releaseRoute(Operation *op) { + auto it = op_to_locs.find(op); + if (it == op_to_locs.end()) + return; + + const std::vector &route = it->second; + + for (const MappingLoc &loc : route) { + loc_to_op.erase(loc); + occupied_locs.erase(loc); + } + + op_to_locs.erase(it); +} + +void MappingState::dumpOpToLocs(llvm::raw_ostream &os) const { + os << "=== MappingState: op_to_locs ===\n"; + + for (const auto &[op, locs] : op_to_locs) { + os << " - " << op->getName(); + if (auto name_attr = op->getAttrOfType("sym_name")) + os << " @" << name_attr; + os << "\n"; + + for (const MappingLoc &loc : locs) { + auto *res = loc.resource; + os << " -> " << res->getType() << "#" << res->getId() + << " @t=" << loc.time_step << "\n"; + } + } + + os << "=== End ===\n"; +} + +void MappingState::encodeMappingState() { + for (const auto &[op, locs] : op_to_locs) { + llvm::SmallVector mapping_entries; + auto ctx = op->getContext(); + for (const MappingLoc &loc : locs) { + std::string kind_str; + if (loc.resource->getKind() == ResourceKind::Tile) { + kind_str = "tile"; + } else if (loc.resource->getKind() == ResourceKind::Link) { + kind_str = "link"; + } else { + kind_str = "unknown"; + } + auto dict = mlir::DictionaryAttr::get( + ctx, + { + mlir::NamedAttribute( + mlir::StringAttr::get(ctx, "resource"), + mlir::StringAttr::get(ctx, kind_str) + ), + mlir::NamedAttribute( + mlir::StringAttr::get(ctx, "id"), + mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.resource->getId()) + ), + mlir::NamedAttribute( + mlir::StringAttr::get(ctx, "time_step"), + mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.time_step) + ) + } + ); + mapping_entries.push_back(dict); + } + op->setAttr("mapping_locs", mlir::ArrayAttr::get(ctx, mapping_entries)); + } } \ No newline at end of file diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp index 3c724a7e..e0736669 100644 --- a/lib/NeuraDialect/Mapping/mapping_util.cpp +++ b/lib/NeuraDialect/Mapping/mapping_util.cpp @@ -1,9 +1,13 @@ #include +#include #include "NeuraDialect/Mapping/mapping_util.h" #include "NeuraDialect/NeuraOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include #include "mlir/IR/Operation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" using namespace mlir; using namespace mlir::neura; @@ -27,10 +31,18 @@ void traverseAlongPath(Operation *op, Value reserve_value, Operation *res_op = reserve_value.getDefiningOp(); if (res_op) current_path.push_front(res_op); - constexpr int kNumExcludedOps = 2; + int effective_length = 0; + for (Operation *op : current_path) { + // Skips the non-materialized ops when counting the cycle length. + if (!isa(op)) { + ++effective_length; + } + } collected_paths.push_back(RecurrenceCycle{ operations: SmallVector(current_path.begin(), current_path.end()), - length: static_cast(current_path.size()) - kNumExcludedOps + length: static_cast(effective_length) }); if (res_op) current_path.pop_front(); @@ -71,7 +83,6 @@ SmallVector mlir::neura::collectRecurrenceCycles(Operation * for (auto &cycle : collected_paths) { cycle.operations.push_back(ctrl_mov_op); - ++cycle.length; recurrence_cycles.push_back(std::move(cycle)); } }); @@ -89,8 +100,8 @@ int mlir::neura::calculateResMii(Operation *func_op, if (isa(op) || isa(op)) { + neura::DataMovOp, + neura::ReserveOp>(op)) { return; } ++num_ops; @@ -103,3 +114,535 @@ int mlir::neura::calculateResMii(Operation *func_op, return llvm::divideCeil(num_ops, num_tiles); } + +std::vector mlir::neura::getTopologicallySortedOps(Operation *func_op) { + std::vector sorted_ops; + llvm::DenseMap pending_deps; + std::deque ready_queue; + + // Collects recurrence cycle ops. + auto recurrence_cycles = collectRecurrenceCycles(func_op); + llvm::DenseSet recurrence_ops; + for (const auto &cycle : recurrence_cycles) + for (Operation *op : cycle.operations) + recurrence_ops.insert(op); + + // Counts unresolved dependencies for each op. + func_op->walk([&](Operation *op) { + if (op == func_op) return; + int dep_count = 0; + for (Value operand : op->getOperands()) + if (operand.getDefiningOp()) + ++dep_count; + pending_deps[op] = dep_count; + if (dep_count == 0) { + // TODO: Prioritize recurrence ops. But cause compiled II regression. + // https://github.com/coredac/dataflow/issues/59. + if (recurrence_ops.contains(op)) { + // ready_queue.push_front(op); + ready_queue.push_back(op); + } else { + ready_queue.push_back(op); + } + } + }); + + // BFS-style topological sort with recurrence priority. + while (!ready_queue.empty()) { + Operation *op = ready_queue.front(); + ready_queue.pop_front(); + sorted_ops.push_back(op); + + for (Value result : op->getResults()) { + for (Operation *user : result.getUsers()) { + if (--pending_deps[user] == 0) { + // TODO: Prioritize recurrence ops. But cause compiled II regression. + // https://github.com/coredac/dataflow/issues/59. + if (recurrence_ops.contains(user)) { + // ready_queue.push_front(user); + ready_queue.push_back(user); + } else { + ready_queue.push_back(user); + } + } + } + } + } + + return sorted_ops; +} + +mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) { + assert(isa(op) && "Expected a ctrl_mov operation"); + auto ctrl_mov = dyn_cast(op); + Value target = ctrl_mov.getTarget(); + + assert(isa(target.getDefiningOp()) && + "Expected the user of ctrl_mov target to be a reserve operation"); + auto reserve_op = dyn_cast(target.getDefiningOp()); + + // Skip ctrl_mov users of reserve; return the first phi user. + for (Operation *user : reserve_op.getResult().getUsers()) { + if (isa(user)) continue; // skip ctrl_mov user + if (isa(user)) return user; + } + assert(false && "No materialized backward user (i.e., phi) found for ctrl_mov"); +} + +llvm::SmallVector mlir::neura::getMaterializedUserOps(Operation *op) { + llvm::SmallVector result; + llvm::DenseSet visited; + visited.insert(op); + llvm::errs() << "Starting to collect materialized users for: " << *op << "\n"; + llvm::SmallVector worklist(op->getUsers().begin(), op->getUsers().end()); + + while (!worklist.empty()) { + Operation *curr = worklist.pop_back_val(); + llvm::errs() << "Visiting operation: " << *curr << "\n"; + if (!visited.insert(curr).second) { + llvm::errs() << "Already visited, so skip: " << *curr << "\n"; + continue; + } + + if (isa(curr)) { + for (Operation *next : curr->getUsers()) { + if (visited.insert(next).second) { + // Only adds the next operation if it hasn't been visited yet. + worklist.push_back(next); + } + } + continue; + } + + // Specially handles the ctrl_mov, i.e., the second operand of ctrl_mov is + // treated as a target/destination/user in terms of dataflow. + if (auto ctrl_mov = dyn_cast(curr)) { + Value target = ctrl_mov.getTarget(); + for (Operation *user : target.getUsers()) { + if (visited.insert(user).second) { + worklist.push_back(user); + } + } + continue; + } + + // Materialized op + result.push_back(curr); + } + + for (Operation *res : result) { + llvm::errs() << "Materialized user: " << *res << "\n"; + } + return result; +} + +bool mlir::neura::tryRouteForwardMove(Operation *mov_op, + MappingLoc src_loc, + MappingLoc dst_loc, + const MappingState &state, + std::vector &path_out) { + return tryRouteDataMove(mov_op, src_loc, dst_loc, false, state, path_out); +} + +bool mlir::neura::tryRouteBackwardMove(Operation *mov_op, + MappingLoc src_loc, + MappingLoc dst_loc, + const MappingState &state, + std::vector &path_out) { + llvm::errs() << "[tryRouteBackwardMove] src_loc: " << src_loc.resource->getType() + << "#" << src_loc.resource->getId() + << " @t=" << src_loc.time_step + << ", dst_loc: " << dst_loc.resource->getType() + << "#" << dst_loc.resource->getId() + << " @t=" << dst_loc.time_step << "\n"; + return tryRouteDataMove(mov_op, src_loc, dst_loc, true, state, path_out); +} + +bool mlir::neura::tryRouteDataMove(Operation *mov_op, + MappingLoc src_loc, + MappingLoc dst_loc, + bool is_backward_move, + const MappingState &state, + std::vector &path_out) { + // Specially handles the case where src and dst are the same tile. + if (src_loc.resource == dst_loc.resource) { + return true; + } + struct QueueEntry { + Tile *tile; + int time; + std::vector path; + }; + + Tile *src_tile = dyn_cast(src_loc.resource); + Tile *dst_tile = dyn_cast(dst_loc.resource); + + std::queue queue; + std::set visited; + + queue.push({src_tile, src_loc.time_step, {}}); + visited.insert(src_tile); + + // Tolerates the deadline step by II for backward moves (as the data should + // arrive at the next iteration). + const int deadline_step = dst_loc.time_step + (is_backward_move ? state.getII() : 0); + + // BFS-style search for a path from src_tile to dst_tile. + while (!queue.empty()) { + auto [current_tile, current_time, current_path] = queue.front(); + queue.pop(); + + if (current_tile == dst_tile) { + // Confirms path reaches the target tile no later than deadline step. + if (current_time <= deadline_step) { + // Either arrives exactly right before the dst starts computation. + // So the current_time on the target tile is the same as deadline step. + if (current_time == deadline_step) { + path_out = current_path; + return true; + } + + // The last link can be held from arrival_time to dst_time - 1. + // TODO: We actually don't need to occupy the last link if the registers + // within the tile can be explicitly represented. + // https://github.com/coredac/dataflow/issues/52. + bool all_free = true; + assert(!current_path.empty() && "Path should not be empty when checking last link"); + MappingLoc last_link = current_path.back(); + std::vector last_link_occupying; + for (int t = current_time; t < deadline_step; ++t) { + MappingLoc repeated{last_link.resource, t}; + last_link_occupying.push_back(repeated); + if (!state.isAvailableAcrossTime(repeated)) { + all_free = false; + break; + } + } + if (all_free) { + path_out = current_path; + path_out.insert(path_out.end(), last_link_occupying.begin(), last_link_occupying.end()); + return true; + } + + } else { + // Arrives too late, not schedulable. + continue; + } + } + + for (MappingLoc current_step_next_link : state.getCurrentStepLinks({current_tile, current_time})) { + if (!state.isAvailableAcrossTime(current_step_next_link)) continue; + + Link *next_link = dyn_cast(current_step_next_link.resource); + Tile *next_tile = next_link->getDstTile(); + int next_time = current_time + 1; + + if (!visited.insert(next_tile).second) continue; + + std::vector extended_path = current_path; + extended_path.push_back(current_step_next_link); + queue.push({next_tile, next_time, std::move(extended_path)}); + } + } + + return false; +} + +Operation* mlir::neura::getMaterializedProducer(Value operand) { + Operation *producer = operand.getDefiningOp(); + assert(isa(producer) && "Expected operand to be defined by a DataMovOp"); + // Finds the actual producer. + auto mov_op = dyn_cast(producer); + auto materialized_producer = mov_op.getOperand().getDefiningOp(); + return materialized_producer; +} + +bool mlir::neura::tryHeuristicMapping(std::vector &sorted_ops, + const Architecture &architecture, + MappingState &mapping_state) { + DenseSet visited; + + for (Operation *op : sorted_ops) { + // TODO: Build up util func to distinguish materialized and non-materialized ops. + if (isa(op)) + continue; + + std::vector sorted_locs = calculateAward(op, architecture, mapping_state); + // auto target_loc = getLocWithMinCost(loc_with_cost); + if (sorted_locs.empty()) { + llvm::errs() << "[DEBUG] No locations found for op: " << *op << "\n"; + return false; // No locations available for this operation. + } + assert(!sorted_locs.empty() && + "No locations found for the operation to map"); + MappingLoc target_loc = sorted_locs.front(); + if (placeAndRoute(op, target_loc, mapping_state)) { + llvm::errs() << "[DEBUG] Successfully scheduled op: " << *op + << " at loc: " << target_loc.resource->getType() + << "#" << target_loc.resource->getId() + << " @t=" << target_loc.time_step << "\n"; + continue; + } else { + llvm::errs() << "[DEBUG] Failed to schedule op: " << *op << "; target loc: " << target_loc.resource->getType() << "#" << target_loc.resource->getId() << " @t=" << target_loc.time_step << "\n"; + } + // TODO: Optimization -- backtrack a few times if failed to schedule the op. + // https://github.com/coredac/dataflow/issues/59 + return false; + } + + return true; +} + +bool mlir::neura::canReachLocInTime(const std::vector &producers, + const MappingLoc &target_loc, + int deadline_step, + const MappingState &mapping_state) { + + for (Operation *producer : producers) { + // Get the last location of the producer. + auto producer_locs = mapping_state.getAllLocsOfOp(producer); + assert(!producer_locs.empty() && "No locations found for producer"); + + MappingLoc producer_loc = producer_locs.back(); + if (!canReachLocInTime(producer_loc, target_loc, deadline_step, mapping_state)) { + return false; + } + } + return true; +} + +bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc, + const MappingLoc &dst_loc, + int deadline_step, + const MappingState &mapping_state) { + // Checks if the destination is reachable from the source within the given time window. + if (src_loc.resource == dst_loc.resource && + dst_loc.time_step <= deadline_step) { + return true; + } + + // Checks if the destination is reachable from the source tile within given steps. + assert(isa(src_loc.resource)); + assert(isa(dst_loc.resource)); + + struct QueueEntry { + MappingLoc loc; + int current_time; + }; + + std::queue queue; + llvm::DenseSet visited; + + queue.push({src_loc, src_loc.time_step}); + visited.insert(dyn_cast(src_loc.resource)); + + while (!queue.empty()) { + auto [current_loc, current_time] = queue.front(); + queue.pop(); + + // If we reach the destination tile and time step is not after dst_loc + if (current_loc.resource == dst_loc.resource && + current_time <= dst_loc.time_step && + dst_loc.time_step <= deadline_step) { + return true; + } + + if (current_time >= deadline_step) + continue; + + // Explores all next step tiles from the current location. + for (const MappingLoc &next_loc : mapping_state.getNextStepTiles(current_loc)) { + if (!mapping_state.isAvailableAcrossTime(next_loc)) + continue; + + int next_time = current_time + 1; + if (next_time > deadline_step) + continue; + + Tile *next_tile = llvm::dyn_cast(next_loc.resource); + assert(next_tile && "Next location must be a Tile"); + if (visited.contains(next_tile)) { + continue; + } + + visited.insert(next_tile); + + MappingLoc next_step_loc = next_loc; + next_step_loc.time_step = next_time; + + queue.push({next_step_loc, next_time}); + } + } + + return false; +} + +void mlir::neura::updateAward(std::map &locs_with_award, + MappingLoc loc, int award) { + // Updates the award of the top element in the priority queue. + if (locs_with_award.find(loc) != locs_with_award.end()) { + locs_with_award[loc] += award; + } else { + locs_with_award[loc] = award; + } +} + +std::vector mlir::neura::calculateAward(Operation *op, + const Architecture &architecture, + const MappingState &mapping_state) { + // A heap of locations with their associated award. Note that we use a max-heap + // to prioritize locations with higher awards. + std::map locs_with_award; + + // Assembles all the producers. + std::vector producers; + for (Value operand : op->getOperands()) { + if (isa(operand.getDefiningOp())) { + // Skips Reserve ops (backward ctrl move) when estimate cost. + continue; + } + Operation *producer = getMaterializedProducer(operand); + assert(producer && "Expected a materialized producer"); + producers.push_back(producer); + } + + llvm::errs() << "[calculateAward] Operation: " << *op + << "; Producers: " << producers.size() << "\n"; + for (Tile *tile : architecture.getAllTiles()) { + int earliest_start_time_step = 0; + for (Operation *producer : producers) { + std::vector producer_locs = mapping_state.getAllLocsOfOp(producer); + assert(!producer_locs.empty() && "No locations found for producer"); + + MappingLoc producer_loc = producer_locs.back(); + earliest_start_time_step = std::max(earliest_start_time_step, + producer_loc.time_step + 1); + } + int award = mapping_state.getII() + tile->getDstTiles().size(); + for (int t = earliest_start_time_step; + t < earliest_start_time_step + mapping_state.getII(); t += 1) { + MappingLoc tile_loc_candidate = {tile, t}; + // If the tile at time `t` is available, we can consider it for mapping. + if (mapping_state.isAvailableAcrossTime(tile_loc_candidate)) { + // If no producer or the location is reachable by all producers, + // we can consider it for mapping and grant reward. + if (producers.empty() || + canReachLocInTime(producers, + tile_loc_candidate, + t, + mapping_state)) { + updateAward(locs_with_award, tile_loc_candidate, award); + } + } + // The mapping location with earlier time step is granted with a higher award. + award -= 1; + } + assert(award >= 0 && "Award should not be negative"); + } + + // Copies map entries into a vector of pairs for sorting. + std::vector> locs_award_vec(locs_with_award.begin(), locs_with_award.end()); + + // Sorts by award (descending). + std::sort(locs_award_vec.begin(), locs_award_vec.end(), + [](const std::pair &a, const std::pair &b) { + return a.second > b.second; + }); + // TODO: Needs to handle tie case and prioritize lower resource utilization, however, + // compiled II becomes worse after adding this tie-breaker: https://github.com/coredac/dataflow/issues/59. + // std::sort(locs_award_vec.begin(), locs_award_vec.end(), + // [&](const std::pair &a, const std::pair &b) { + // if (a.second != b.second) { + // return a.second > b.second; + // } + // // Tie-breaker: prioritizes lower resource utilization and earlier time step. + // if (a.first.time_step != b.first.time_step) { + // return a.first.time_step > b.first.time_step; + // } + // const bool is_resource_a_lower_utilized = + // mapping_state.countOpsAtResource(a.first.resource) > + // mapping_state.countOpsAtResource(b.first.resource); + // return is_resource_a_lower_utilized; + // }); + + // Extracts just the MappingLocs, already sorted by award. + std::vector sorted_locs; + sorted_locs.reserve(locs_award_vec.size()); + for (const auto &pair : locs_award_vec) + sorted_locs.push_back(pair.first); + + return sorted_locs; +} + +llvm::SmallVector mlir::neura::getCtrlMovUsers(Operation *op) { + llvm::SmallVector result; + for (Operation *user : op->getUsers()) { + if (isa(user)) { + result.push_back(user); + } + } + return result; +} + +bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state) { + if (mapping_state.bindOp(target_loc, op)) { + // Tries to route the data move operations. + for (Value operand : op->getOperands()) { + if (isa(operand.getDefiningOp())) { + // Skips Reserve ops (backward ctrl move) when estimate cost. + continue; + } + Operation *data_move = operand.getDefiningOp(); + assert(isa(data_move) && "Expected a DataMovOp as operand producer"); + Operation *producer = getMaterializedProducer(operand); + MappingLoc src_loc = mapping_state.getAllLocsOfOp(producer).back(); + + std::vector route_path; + if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state, route_path)) { + mapping_state.reserveRoute(data_move, route_path); + llvm::errs() << "[DEBUG] Successfully routed data move: " << *data_move + << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId() + << " @t=" << src_loc.time_step + << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId() + << " @t=" << target_loc.time_step << "\n"; + continue; + } + llvm::errs() << "[DEBUG] Failed to route data move: " << *data_move + << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId() + << " @t=" << src_loc.time_step + << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId() + << " @t=" << target_loc.time_step << "\n"; + mapping_state.unbindOp(op); + mapping_state.releaseRoute(data_move); + return false; + } + // Checks whether the operation's user is a ctrl_mov. + for (Operation *user : getCtrlMovUsers(op)) { + auto ctrl_mov = dyn_cast(user); + llvm::errs() << "[DEBUG] Found ctrl_mov user: " << *ctrl_mov << "\n"; + assert(ctrl_mov && "Expected user to be a CtrlMovOp"); + mlir::Operation *materialized_backward_op = getMaterializedBackwardUser(ctrl_mov); + assert(isa(materialized_backward_op) && + "Expected materialized operation of ctrl_mov to be a PhiOp"); + // Gets the last location of the materialized operation. + MappingLoc backward_loc = mapping_state.getAllLocsOfOp(materialized_backward_op).back(); + // Routes the ctrl_mov to the phi location. + std::vector route_path; + if (tryRouteBackwardMove(ctrl_mov, target_loc, backward_loc, mapping_state, route_path)) { + mapping_state.reserveRoute(ctrl_mov, route_path); + llvm::errs() << "[DEBUG] Successfully routed ctrl_mov: " << *ctrl_mov + << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId() + << " @t=" << backward_loc.time_step << "\n"; + continue; + } + llvm::errs() << "[DEBUG] Failed to route ctrl_mov: " << *ctrl_mov + << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId() + << " @t=" << backward_loc.time_step << "\n"; + mapping_state.unbindOp(op); + mapping_state.releaseRoute(ctrl_mov); + return false; + } + return true; + } + return false; +} \ No newline at end of file diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt index c1d16bdc..7d944fbb 100644 --- a/lib/NeuraDialect/Transforms/CMakeLists.txt +++ b/lib/NeuraDialect/Transforms/CMakeLists.txt @@ -9,6 +9,7 @@ add_mlir_library( TransformCtrlToDataFlowPass.cpp LeveragePredicatedValuePass.cpp MapToAcceleratorPass.cpp + GenerateCodePass.cpp DEPENDS MLIRNeuraTransformsIncGen diff --git a/lib/NeuraDialect/Transforms/GenerateCodePass.cpp b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp new file mode 100644 index 00000000..6c223f83 --- /dev/null +++ b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp @@ -0,0 +1,137 @@ +#include "NeuraDialect/NeuraDialect.h" +#include "NeuraDialect/NeuraOps.h" +#include "NeuraDialect/NeuraPasses.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::neura; + +#define GEN_PASS_DEF_GenerateCode +#include "NeuraDialect/NeuraPasses.h.inc" + +namespace { + +struct GenerateCodePass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(GenerateCodePass) + + StringRef getArgument() const override { return "generate-code"; } + StringRef getDescription() const override { + return "Generates JSON code from mapped Neura IR."; + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + ModuleOp module = getOperation(); + + llvm::json::Array functions_array; + + for (auto func : module.getOps()) { + auto accel_attr = func->getAttrOfType("accelerator"); + if (!accel_attr || accel_attr.getValue() != "neura") + continue; + + llvm::json::Object func_obj; + func_obj["name"] = func.getName().str(); + + if (auto ii_attr = func->getAttrOfType("CompiledII")) + func_obj["CompiledII"] = ii_attr.getInt(); + if (auto recMII_attr = func->getAttrOfType("RecMII")) + func_obj["RecMII"] = recMII_attr.getInt(); + if (auto resMII_attr = func->getAttrOfType("ResMII")) + func_obj["ResMII"] = resMII_attr.getInt(); + + llvm::json::Array op_array; + + func.walk([&](Operation *op) { + if (isa(op)) + return; + + llvm::json::Object op_obj; + op_obj["name"] = op->getName().getStringRef().str(); + + // Result types. + llvm::json::Array result_types; + for (auto result : op->getResults()) { + std::string type_str; + llvm::raw_string_ostream os(type_str); + result.getType().print(os); + result_types.push_back(os.str()); + } + op_obj["result_types"] = std::move(result_types); + + // Operands. + llvm::json::Array operand_indices; + for (Value operand : op->getOperands()) { + if (auto defining_op = operand.getDefiningOp()) + operand_indices.push_back(defining_op->getName().getStringRef().str()); + else + operand_indices.push_back("block_arg"); + } + op_obj["operands"] = std::move(operand_indices); + + // Constants. + if (auto const_op = mlir::dyn_cast(op)) { + auto val_attr = const_op.getValue(); + if (val_attr) { + if (auto int_attr = mlir::dyn_cast(val_attr)) { + op_obj["constant_value"] = std::to_string(int_attr.getInt()); + } else if (auto float_attr = mlir::dyn_cast(val_attr)) { + op_obj["constant_value"] = std::to_string(float_attr.getValueAsDouble()); + } + } + } + + // Mapping locs. + llvm::json::Array loc_array; + if (auto attr_array = op->getAttrOfType("mapping_locs")) { + for (Attribute attr : attr_array) { + if (auto loc = mlir::dyn_cast(attr)) { + llvm::json::Object loc_obj; + if (auto idAttr = mlir::dyn_cast(loc.get("id"))) + loc_obj["id"] = idAttr.getInt(); + if (auto resource_attr = mlir::dyn_cast(loc.get("resource"))) + loc_obj["resource"] = resource_attr.getValue().str(); + if (auto timestep_attr = mlir::dyn_cast(loc.get("time_step"))) + loc_obj["time_step"] = timestep_attr.getInt(); + loc_array.push_back(std::move(loc_obj)); + } + } + } + op_obj["mapping_locs"] = std::move(loc_array); + + op_array.push_back(std::move(op_obj)); + }); + + func_obj["operations"] = std::move(op_array); + functions_array.push_back(std::move(func_obj)); + } + + // Final JSON object. + llvm::json::Object root; + root["functions"] = std::move(functions_array); + + // llvm::outs() << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n"; + std::error_code ec; + llvm::raw_fd_ostream json_out("generated-instructions.json", ec); + if (ec) { + getOperation()->emitError("Failed to open 'generated-instructions.json' for writing: " + ec.message()); + return signalPassFailure(); + } + json_out << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n"; + } +}; + +} // namespace + +namespace mlir::neura { +std::unique_ptr createGenerateCodePass() { + return std::make_unique(); +} +} // namespace mlir::neura diff --git a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp index ea6d1ef8..a684c92f 100644 --- a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp +++ b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp @@ -23,32 +23,65 @@ struct InsertDataMovForNeuraOps : public RewritePattern { return failure(); } - // Skips ops that already being inserted mov on the operands. - bool allInputsAreMov = llvm::all_of(op->getOperands(), [](Value v) { - return isa_and_nonnull(v.getDefiningOp()); + bool all_inputs_are_mov_except_reserve = llvm::all_of(op->getOperands(), [](Value v) { + Operation *def_op = v.getDefiningOp(); + return isa_and_nonnull(def_op) || + isa_and_nonnull(def_op); }); - if (allInputsAreMov) { - return failure(); - } + + if (all_inputs_are_mov_except_reserve) + return failure(); // All operands are already handled + + // // Skips ops that already being inserted mov on the operands. + // bool all_inputs_are_mov = llvm::all_of(op->getOperands(), [](Value v) { + // return isa_and_nonnull(v.getDefiningOp()); + // }); + // if (all_inputs_are_mov) { + // return failure(); + // } + + // // Special case: skips rewriting phi if any operand is from reserve. + // if (isa(op)) { + // bool has_reserved_input = llvm::any_of(op->getOperands(), [](Value v) { + // return isa_and_nonnull(v.getDefiningOp()); + // }); + + // if (has_reserved_input) + // return failure(); // Skip entire phi if any operand is reserved. + // } // Makes sure none of the operand has being processed. - bool hasAnyMovInput = llvm::any_of(op->getOperands(), [](Value v) { + bool has_any_mov_input = llvm::any_of(op->getOperands(), [](Value v) { return isa_and_nonnull(v.getDefiningOp()); }); - assert(!hasAnyMovInput && "Unexpected: operand already wrapped in neura.mov"); + if (has_any_mov_input) + llvm::errs() << "Warning: Operand already wrapped in neura.data_mov: " << *op << "\n"; + assert(!has_any_mov_input && "Unexpected: operand already wrapped in neura.mov"); Location loc = op->getLoc(); + // Skips adding mov if the consumer is ctrl_mov. + if (isa(op)) { + return failure(); // do not rewrite + } + // Wraps operands in mov. - SmallVector newOperands; + SmallVector new_operands; for (Value operand : op->getOperands()) { + Operation *producer = operand.getDefiningOp(); + // Skips adding mov for neura.reserve -> neura.phi. + if (isa(op) && producer && isa(producer)) { + new_operands.push_back(operand); + continue; + } + auto mov = rewriter.create(loc, operand.getType(), operand); - newOperands.push_back(mov); + new_operands.push_back(mov); } // Clones op with new operands. OperationState state(loc, op->getName()); - state.addOperands(newOperands); + state.addOperands(new_operands); state.addTypes(op->getResultTypes()); state.addAttributes(op->getAttrs()); @@ -59,8 +92,8 @@ struct InsertDataMovForNeuraOps : public RewritePattern { } } - Operation *newOp = rewriter.create(state); - rewriter.replaceOp(op, newOp->getResults()); + Operation *new_op = rewriter.create(state); + rewriter.replaceOp(op, new_op->getResults()); return success(); } }; diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp index 7831b50a..699d22cc 100644 --- a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp +++ b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp @@ -64,16 +64,33 @@ struct MapToAcceleratorPass } // AcceleratorConfig config{/*numTiles=*/8}; // Example - Architecture architecture(2, 2); + Architecture architecture(4, 4); int res_mii = calculateResMii(func, architecture); IntegerAttr res_mii_attr = IntegerAttr::get( IntegerType::get(func.getContext(), 32), res_mii); func->setAttr("ResMII", res_mii_attr); const int minII = std::min(rec_mii, res_mii); - constexpr int maxII = 5; + constexpr int maxII = 10; + std::vector sorted_ops = getTopologicallySortedOps(func); + for (Operation *op : sorted_ops) { + llvm::errs() << "[MapToAcceleratorPass] sorted op: " + << *op << "\n"; + } for (int ii = minII; ii <= maxII; ++ii) { - MappingState state(architecture, ii); + MappingState mapping_state(architecture, ii); + if (tryHeuristicMapping(sorted_ops, architecture, mapping_state)) { + // success + llvm::errs() << "[MapToAcceleratorPass] Successfully mapped function '" + << func.getName() << "' with II = " << ii << "\n"; + mapping_state.dumpOpToLocs(); // logs to stderr + mapping_state.encodeMappingState(); + func->setAttr("CompiledII", IntegerAttr::get( + IntegerType::get(func.getContext(), 32), ii)); + break; + } + llvm::errs() << "[DEBUG] mapping failed for II = " << ii << "\n"; + mapping_state.dumpOpToLocs(); // logs to stderr } }); } diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir index e656bbef..e5a57b8b 100644 --- a/test/neura/ctrl/branch_for.mlir +++ b/test/neura/ctrl/branch_for.mlir @@ -16,8 +16,28 @@ // RUN: --lower-llvm-to-neura \ // RUN: --leverage-predicated-value \ // RUN: --transform-ctrl-to-data-flow \ +// RUN: --insert-data-mov \ +// RUN: | FileCheck %s -check-prefix=MOV + +// RUN: mlir-neura-opt %s \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: --insert-data-mov \ +// RUN: --map-to-accelerator \ +// RUN: | FileCheck %s -check-prefix=MAPPING + +// RUN: mlir-neura-opt %s \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: --insert-data-mov \ // RUN: --map-to-accelerator \ -// RUN: | FileCheck %s -check-prefix=MII +// RUN: --generate-code + +// RUN: FileCheck %s --input-file=generated-instructions.json -check-prefix=INST func.func @loop_test() -> f32 { %n = llvm.mlir.constant(10 : i64) : i64 @@ -81,4 +101,107 @@ func.func @loop_test() -> f32 { // CTRL2DATA-NEXT: "neura.return"(%18) : (!neura.data) -> () // CTRL2DATA-NEXT: } -// MII: func.func @loop_test() -> f32 attributes {RecMII = 4 : i32, ResMII = 4 : i32, accelerator = "neura"} \ No newline at end of file +// MOV: func.func @loop_test() -> f32 attributes {accelerator = "neura"} { +// MOV-NEXT: %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data +// MOV-NEXT: %1 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data +// MOV-NEXT: %2 = "neura.grant_always"(%1) : (!neura.data) -> !neura.data +// MOV-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data +// MOV-NEXT: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data +// MOV-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data) -> !neura.data +// MOV-NEXT: %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data +// MOV-NEXT: %7 = "neura.data_mov"(%6) : (!neura.data) -> !neura.data +// MOV-NEXT: %8 = "neura.grant_always"(%7) : (!neura.data) -> !neura.data +// MOV-NEXT: %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data +// MOV-NEXT: %10 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data +// MOV-NEXT: %11 = "neura.grant_always"(%10) : (!neura.data) -> !neura.data +// MOV-NEXT: %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data +// MOV-NEXT: %13 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data +// MOV-NEXT: %14 = "neura.grant_once"(%13) : (!neura.data) -> !neura.data +// MOV-NEXT: %15 = neura.reserve : !neura.data +// MOV-NEXT: %16 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data +// MOV-NEXT: %17 = "neura.phi"(%16, %15) : (!neura.data, !neura.data) -> !neura.data +// MOV-NEXT: %18 = neura.reserve : !neura.data +// MOV-NEXT: %19 = "neura.data_mov"(%14) : (!neura.data) -> !neura.data +// MOV-NEXT: %20 = "neura.phi"(%19, %18) : (!neura.data, !neura.data) -> !neura.data +// MOV-NEXT: %21 = "neura.data_mov"(%20) : (!neura.data) -> !neura.data +// MOV-NEXT: %22 = "neura.data_mov"(%11) : (!neura.data) -> !neura.data +// MOV-NEXT: %23 = "neura.fadd"(%21, %22) : (!neura.data, !neura.data) -> !neura.data +// MOV-NEXT: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data +// MOV-NEXT: %25 = "neura.data_mov"(%8) : (!neura.data) -> !neura.data +// MOV-NEXT: %26 = "neura.add"(%24, %25) : (!neura.data, !neura.data) -> !neura.data +// MOV-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MOV-NEXT: %28 = "neura.data_mov"(%2) : (!neura.data) -> !neura.data +// MOV-NEXT: %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// MOV-NEXT: %30 = "neura.data_mov"(%29) : (!neura.data) -> !neura.data +// MOV-NEXT: %31 = "neura.not"(%30) : (!neura.data) -> !neura.data +// MOV-NEXT: %32 = "neura.data_mov"(%23) : (!neura.data) -> !neura.data +// MOV-NEXT: %33 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data +// MOV-NEXT: %34 = neura.grant_predicate %32, %33 : !neura.data, !neura.data -> !neura.data +// MOV-NEXT: %35 = "neura.data_mov"(%23) : (!neura.data) -> !neura.data +// MOV-NEXT: %36 = "neura.data_mov"(%29) : (!neura.data) -> !neura.data +// MOV-NEXT: %37 = neura.grant_predicate %35, %36 : !neura.data, !neura.data -> !neura.data +// MOV-NEXT: neura.ctrl_mov %37 -> %18 : !neura.data !neura.data +// MOV-NEXT: %38 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data +// MOV-NEXT: %39 = "neura.data_mov"(%29) : (!neura.data) -> !neura.data +// MOV-NEXT: %40 = neura.grant_predicate %38, %39 : !neura.data, !neura.data -> !neura.data +// MOV-NEXT: neura.ctrl_mov %40 -> %15 : !neura.data !neura.data +// MOV-NEXT: %41 = "neura.data_mov"(%34) : (!neura.data) -> !neura.data +// MOV-NEXT: "neura.return"(%41) : (!neura.data) -> () +// MOV-NEXT: } + +// MAPPING: func.func @loop_test() -> f32 attributes {CompiledII = 6 : i32, RecMII = 4 : i32, ResMII = 1 : i32, accelerator = "neura"} { +// MAPPING-NEXT: %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = "neura.data_mov"(%0) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %2 = "neura.grant_always"(%1) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%3) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.grant_once"(%4) {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %7 = "neura.data_mov"(%6) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %8 = "neura.grant_always"(%7) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %10 = "neura.data_mov"(%9) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %11 = "neura.grant_always"(%10) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%12) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.grant_once"(%13) {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %15 = neura.reserve : !neura.data +// MAPPING-NEXT: %16 = "neura.data_mov"(%5) {mapping_locs = [{id = 19 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.phi"(%16, %15) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %18 = neura.reserve : !neura.data +// MAPPING-NEXT: %19 = "neura.data_mov"(%14) {mapping_locs = [{id = 43 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.phi"(%19, %18) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.data_mov"(%20) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = "neura.data_mov"(%11) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %23 = "neura.fadd"(%21, %22) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %24 = "neura.data_mov"(%17) {mapping_locs = [{id = 14 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %25 = "neura.data_mov"(%8) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %26 = "neura.add"(%24, %25) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %27 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %28 = "neura.data_mov"(%2) {mapping_locs = [{id = 15 : i32, resource = "link", time_step = 1 : i32}, {id = 11 : i32, resource = "link", time_step = 2 : i32}, {id = 26 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 4 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %30 = "neura.data_mov"(%29) {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %31 = "neura.not"(%30) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%23) {mapping_locs = [{id = 31 : i32, resource = "link", time_step = 3 : i32}, {id = 17 : i32, resource = "link", time_step = 4 : i32}, {id = 6 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = "neura.data_mov"(%31) {mapping_locs = [{id = 13 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %34 = neura.grant_predicate %32, %33 {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 6 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %35 = "neura.data_mov"(%23) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %36 = "neura.data_mov"(%29) {mapping_locs = [{id = 30 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %37 = neura.grant_predicate %35, %36 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %37 -> %18 {mapping_locs = []} : !neura.data !neura.data +// MAPPING-NEXT: %38 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %39 = "neura.data_mov"(%29) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %40 = neura.grant_predicate %38, %39 {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %40 -> %15 {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 5 : i32}, {id = 27 : i32, resource = "link", time_step = 6 : i32}, {id = 27 : i32, resource = "link", time_step = 7 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %41 = "neura.data_mov"(%34) {mapping_locs = []} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.return"(%41) {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 7 : i32}]} : (!neura.data) -> () +// MAPPING-NEXT: } + +// INST: "name": "neura.fadd", +// INST-NEXT: "operands": [ +// INST-NEXT: "neura.data_mov", +// INST-NEXT: "neura.data_mov" +// INST-NEXT: ], +// INST-NEXT: "result_types": [ +// INST-NEXT: "!neura.data" +// INST-NEXT: ] \ No newline at end of file