diff --git a/include/NeuraDialect/Architecture/Architecture.h b/include/NeuraDialect/Architecture/Architecture.h
index f40ae183..2d560e75 100644
--- a/include/NeuraDialect/Architecture/Architecture.h
+++ b/include/NeuraDialect/Architecture/Architecture.h
@@ -11,6 +11,12 @@
namespace mlir {
namespace neura {
+// Enum for identifying resource type.
+enum class ResourceKind {
+ Tile,
+ Link,
+};
+
//===----------------------------------------------------------------------===//
// BasicResource: abstract base class for Tile, Link, etc.
//===----------------------------------------------------------------------===//
@@ -20,6 +26,7 @@ class BasicResource {
virtual ~BasicResource() = default;
virtual int getId() const = 0;
virtual std::string getType() const = 0;
+ virtual ResourceKind getKind() const = 0;
};
//===----------------------------------------------------------------------===//
@@ -37,6 +44,12 @@ class Tile : public BasicResource {
int getId() const override;
std::string getType() const override { return "tile"; }
+ ResourceKind getKind() const override { return ResourceKind::Tile; }
+
+ static bool classof(const BasicResource *res) {
+ return res && res->getKind() == ResourceKind::Tile;
+ }
+
int getX() const;
int getY() const;
@@ -64,8 +77,14 @@ class Link : public BasicResource {
Link(int id);
int getId() const override;
+
std::string getType() const override { return "link"; }
+ ResourceKind getKind() const override { return ResourceKind::Link; }
+
+ static bool classof(const BasicResource *res) {
+ return res && res->getKind() == ResourceKind::Link;
+ }
Tile* getSrcTile() const;
Tile* getDstTile() const;
@@ -83,7 +102,8 @@ struct PairHash {
}
};
-/// Describes the entire CGRA architecture.
+// Describes the CGRA architecture template.
+// TODO: Model architecture in detail (e.g., registers, ports).
class Architecture {
public:
Architecture(int width, int height);
@@ -98,8 +118,9 @@ class Architecture {
std::vector getAllLinks() const;
private:
+ // TODO: Model architecture in detail, e.g., ports, registers, crossbars, etc.
+ // https://github.com/coredac/dataflow/issues/52.
std::vector> tile_storage;
-// std::vector tiles;
std::vector> link_storage;
std::unordered_map id_to_tile;
std::unordered_map, Tile*, PairHash> coord_to_tile;
diff --git a/include/NeuraDialect/Mapping/MappingState.h b/include/NeuraDialect/Mapping/MappingState.h
index ed10b265..5736f654 100644
--- a/include/NeuraDialect/Mapping/MappingState.h
+++ b/include/NeuraDialect/Mapping/MappingState.h
@@ -2,7 +2,8 @@
#define NEURA_MAPPING_STATE_H
#include "mlir/IR/Operation.h"
-#include "NeuraDialect/Architecture/Architecture.h" // for BasicResource
+#include "NeuraDialect/Architecture/Architecture.h"
+#include "llvm/Support/raw_ostream.h"
#include
#include
#include
@@ -19,6 +20,12 @@ struct MappingLoc {
bool operator==(const MappingLoc &other) const {
return resource == other.resource && time_step == other.time_step;
}
+
+ bool operator<(const MappingLoc &other) const {
+ if (time_step != other.time_step)
+ return time_step < other.time_step;
+ return resource->getId() < other.resource->getId();
+ }
};
} // namespace neura
@@ -43,44 +50,65 @@ class MappingState {
public:
MappingState(const Architecture &arch, int II);
// Binds a (tile/link, time_step) location to an operation.
- void bindOp(MappingLoc loc, Operation *op);
+ bool bindOp(const MappingLoc &loc, Operation *op);
+
+ // Unbinds an operation from its (tile/link, time_step) location,
+ // which is useful for backtracking.
+ void unbindOp(Operation *op);
// Checks if a (tile/link, time_step) is available (unoccupied).
- bool isAvailable(const MappingLoc &loc) const;
+ // Note that the check is performed in II granularity.
+ // For example, if II is 4, and we want to check (tile 2, step 5), then
+ // it will check (tile 2, step 1), (tile 2, step 5), (tile 2, step 9), etc.
+ bool isAvailableAcrossTime(const MappingLoc &loc) const;
// Gets the operation at a specific (tile/link, time_step) location.
std::optional getOpAt(MappingLoc loc) const;
+ // Counts the number of operations at a specific resource across time steps.
+ int countOpsAtResource(BasicResource *resource) const;
+
// Gets all MRRG nodes.
- const std::unordered_set &getAllLocs() const;
+ const std::set &getAllLocs() const;
+
+ // Gets all MRRG nodes allocated to a given op.
+ const std::vector &getAllLocsOfOp(Operation *op) const;
+
+ // Reserves links for an move operation.
+ void reserveRoute(Operation *op, ArrayRef path);
+
+ // Releases links for an move operation.
+ void releaseRoute(Operation *op);
// Gets neighboring tiles on next step of a given MappingLoc.
- const std::vector &getNextStepTiles(MappingLoc loc) const;
+ std::vector getNextStepTiles(MappingLoc loc) const;
- // Gets neighboring links on next step of a given MappingLoc.
- const std::vector &getNextStepLinks(MappingLoc loc) const;
+// // Gets neighboring links on next step of a given MappingLoc.
+// const std::vector &getNextStepLinks(MappingLoc loc) const;
- // Gets neighboring tiles on current step of a given MappingLoc.
- const std::vector &getCurrentStepTiles(MappingLoc loc) const;
+// // Gets neighboring tiles on current step of a given MappingLoc.
+// const std::vector &getCurrentStepTiles(MappingLoc loc) const;
// Gets neighboring links on current step of a given MappingLoc.
- const std::vector &getCurrentStepLinks(MappingLoc loc) const;
+ std::vector getCurrentStepLinks(MappingLoc loc) const;
+ // Gets the target initiation interval (II) for the mapping.
int getII() const { return II; }
+ // Embeds the mapping states onto the mapped operations.
+ void encodeMappingState();
+
+ void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const;
+
private:
// Initiation interval.
int II;
- std::unordered_set all_locs;
- // current and next step tiles and links for a given MappingLoc. Note that
- // the key MappingLoc is either a pair of (tile, time_step) or (link, time_step).
- std::unordered_map> next_step_tiles;
- std::unordered_map> next_step_links;
- std::unordered_map> current_step_tiles;
- std::unordered_map> current_step_links;
-
- std::unordered_map loc_to_op;
- std::unordered_set occupied_locs;
+ static constexpr int kMaxSteps = 10;
+
+ std::set all_locs;
+ std::set occupied_locs;
+ std::map loc_to_op;
+ std::map> op_to_locs;
};
} // namespace neura
diff --git a/include/NeuraDialect/Mapping/mapping_util.h b/include/NeuraDialect/Mapping/mapping_util.h
index ca3a4b45..1864dd5e 100644
--- a/include/NeuraDialect/Mapping/mapping_util.h
+++ b/include/NeuraDialect/Mapping/mapping_util.h
@@ -2,6 +2,7 @@
#include "mlir/IR/Operation.h"
#include "NeuraDialect/Architecture/Architecture.h"
+#include "NeuraDialect/Mapping/MappingState.h"
namespace mlir {
namespace neura {
@@ -18,5 +19,70 @@ SmallVector collectRecurrenceCycles(Operation *func_op);
// Calculates ResMII: ceil(#ops / #tiles).
int calculateResMii(Operation *func_op, const Architecture &architecture);
+// Returns topologically sorted operations in func_op.
+std::vector getTopologicallySortedOps(Operation *func_op);
+
+Operation* getMaterializedProducer(Value operand);
+
+// Collects the real users of an operation, excluding ctrl_mov and data_mov.
+llvm::SmallVector getMaterializedUserOps(Operation *op);
+
+// Gets the last materialized backward user of an operation, which is expected
+// to be a phi operation.
+Operation *getMaterializedBackwardUser(Operation *op);
+
+// Attempts to map a function operation to the accelerator using heuristics.
+bool tryHeuristicMapping(std::vector &sorted_ops,
+ const Architecture &architecture,
+ MappingState &mapping_state);
+
+// Attempts to route a data move operation from src_loc to dst_loc.
+bool tryRouteDataMove(Operation *mov,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ bool is_backward_move,
+ const MappingState &mapping_state,
+ std::vector &path_out);
+
+bool tryRouteForwardMove(Operation *mov_op,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ const MappingState &state,
+ std::vector &path_out);
+
+bool tryRouteBackwardMove(Operation *mov_op,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ const MappingState &state,
+ std::vector &path_out);
+
+// Calculates the cost of mapping locations for a given op, the returned locations
+// are sorted based on the cost.
+std::vector calculateCost(Operation *op, const MappingState &mapping_state);
+
+// Gets the ctrl_mov users of an operation, empty vector is returned if no ctrl_mov users found.
+llvm::SmallVector getCtrlMovUsers(Operation *op);
+
+// Maps a materialized operation to the accelerator, and routes the dataflow from
+// the producers to the given op.
+bool placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state);
+
+std::vector calculateAward(Operation *op,
+ const Architecture &architecture,
+ const MappingState &mapping_state);
+
+void updateAward(std::map &locs_with_award,
+ MappingLoc loc, int award);
+
+bool canReachLocInTime(const MappingLoc &src_loc,
+ const MappingLoc &dst_loc,
+ int deadline_step,
+ const MappingState &mapping_state);
+
+bool canReachLocInTime(const std::vector &producers,
+ const MappingLoc &target_loc,
+ int deadline_step,
+ const MappingState &mapping_state);
+
} // namespace neura
} // namespace mlir
diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h
index 8fa986f2..8b444603 100644
--- a/include/NeuraDialect/NeuraPasses.h
+++ b/include/NeuraDialect/NeuraPasses.h
@@ -25,6 +25,7 @@ std::unique_ptr createAssignAcceleratorPass();
std::unique_ptr createTransformCtrlToDataFlowPass();
std::unique_ptr createLeveragePredicatedValuePass();
std::unique_ptr createMapToAcceleratorPass();
+std::unique_ptr createGenerateCodePass();
#define GEN_PASS_REGISTRATION
#include "NeuraDialect/NeuraPasses.h.inc"
diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td
index 426fe6d0..823a4b55 100644
--- a/include/NeuraDialect/NeuraPasses.td
+++ b/include/NeuraDialect/NeuraPasses.td
@@ -57,4 +57,14 @@ def MapToAccelerator : Pass<"map-to-accelerator", "ModuleOp"> {
}];
let constructor = "neura::createMapToAcceleratorPass()";
}
+
+def GenerateCode : Pass<"generate-code", "ModuleOp"> {
+ let summary = "Generate JSON-formatted instructions from mapped Neura IR";
+ let description = [{
+ This pass generates JSON file containing the instructions.
+ The instructions can be encoded into configuration signals.
+ }];
+ let constructor = "neura::createGenerateCodePass()";
+}
+
#endif // NEURA_PASSES_TD
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/MappingState.cpp b/lib/NeuraDialect/Mapping/MappingState.cpp
index f4189429..75485ae4 100644
--- a/lib/NeuraDialect/Mapping/MappingState.cpp
+++ b/lib/NeuraDialect/Mapping/MappingState.cpp
@@ -1,37 +1,48 @@
#include "NeuraDialect/Mapping/MappingState.h"
+#include "mlir/IR/BuiltinTypes.h"
using namespace mlir;
using namespace mlir::neura;
MappingState::MappingState(const Architecture &arch, int II) : II(II) {
+ // TODO: Use number of operations to determine the max steps for constructing MRRG.
for (Tile* tile : arch.getAllTiles()) {
- for (int t = 0; t < II; ++t) {
+ for (int t = 0; t < II * kMaxSteps; ++t) {
MappingLoc loc = {tile, t};
all_locs.insert(loc);
-
- // Create edges to neighboring tiles at t+1.
- for (Tile* dst : tile->getDstTiles()) {
- MappingLoc next_step_dst_tile_loc = {dst, (t + 1) % II}; // modulo II for reuse
- next_step_tiles[loc].push_back(next_step_dst_tile_loc);
- }
-
- // TODO: Not sure whether we need the link on t or t+1.
- // Creates edges to neighboring links at t.
- for (Link* dst : tile->getOutLinks()) {
- MappingLoc current_step_dst_link_loc = {dst, t % II};
- next_step_tiles[loc].push_back(current_step_dst_link_loc);
- }
}
}
}
-void MappingState::bindOp(MappingLoc loc, Operation *op) {
+bool MappingState::bindOp(const MappingLoc &loc, Operation *op) {
loc_to_op[loc] = op;
occupied_locs.insert(loc);
+ auto it = op_to_locs.find(op);
+ assert (it == op_to_locs.end() && "Operation already has reserved locations");
+ op_to_locs[op].push_back(loc);
+ return true;
}
-bool MappingState::isAvailable(const MappingLoc &loc) const {
- return occupied_locs.find(loc) == occupied_locs.end();
+void MappingState::unbindOp(Operation *op) {
+ auto it = op_to_locs.find(op);
+ if (it == op_to_locs.end()) return;
+
+ for (const MappingLoc &loc : it->second) {
+ loc_to_op.erase(loc);
+ occupied_locs.erase(loc);
+ }
+
+ op_to_locs.erase(it);
+}
+
+bool MappingState::isAvailableAcrossTime(const MappingLoc &loc) const {
+ for (int t = loc.time_step % II; t < II * kMaxSteps; t += II) {
+ MappingLoc checkLoc = loc;
+ checkLoc.time_step = t;
+ if (occupied_locs.find(checkLoc) != occupied_locs.end())
+ return false;
+ }
+ return true;
}
std::optional MappingState::getOpAt(MappingLoc loc) const {
@@ -40,30 +51,161 @@ std::optional MappingState::getOpAt(MappingLoc loc) const {
return it->second;
}
-const std::unordered_set &MappingState::getAllLocs() const {
+int MappingState::countOpsAtResource(BasicResource *resource) const {
+ int count = 0;
+ for (const auto &[loc, op] : loc_to_op) {
+ if (loc.resource == resource) {
+ count++;
+ }
+ }
+ return count;
+}
+
+const std::set &MappingState::getAllLocs() const {
return all_locs;
}
-const std::vector &MappingState::getNextStepTiles(MappingLoc loc) const {
+const std::vector &MappingState::getAllLocsOfOp(Operation *op) const {
+ auto it = op_to_locs.find(op);
+ if (it != op_to_locs.end())
+ return it->second;
+
static const std::vector empty;
- auto it = next_step_tiles.find(loc);
- return it != next_step_tiles.end() ? it->second : empty;
+ return empty;
}
-const std::vector &MappingState::getNextStepLinks(MappingLoc loc) const {
- static const std::vector empty;
- auto it = next_step_links.find(loc);
- return it != next_step_links.end() ? it->second : empty;
+std::vector MappingState::getNextStepTiles(MappingLoc loc) const {
+ std::vector next_step_tiles;
+ const int next_step = loc.time_step + 1;
+ assert(next_step < II * kMaxSteps && "Next step exceeds max steps");
+ // Collects neighboring tiles at t+1 for both tile and link.
+ if (loc.resource->getKind() == ResourceKind::Tile) {
+ Tile *tile = dyn_cast(loc.resource);
+ for (Tile* dst : tile->getDstTiles()) {
+ MappingLoc next_step_dst_tile_loc = {dst, next_step};
+ next_step_tiles.push_back(next_step_dst_tile_loc);
+ }
+ // Includes self for reuse.
+ next_step_tiles.push_back({tile, next_step});
+ } else if (loc.resource->getKind() == ResourceKind::Link) {
+ Link *link = dyn_cast(loc.resource);
+ Tile* dst = link->getDstTile();
+ MappingLoc next_step_dst_tile_loc = {dst, next_step};
+ next_step_tiles.push_back(next_step_dst_tile_loc);
+ }
+ return next_step_tiles;
}
-const std::vector &MappingState::getCurrentStepTiles(MappingLoc loc) const {
- static const std::vector empty;
- auto it = current_step_tiles.find(loc);
- return it != current_step_tiles.end() ? it->second : empty;
+// const std::vector &MappingState::getNextStepLinks(MappingLoc loc) const {
+// static const std::vector empty;
+// auto it = next_step_links.find(loc);
+// return it != next_step_links.end() ? it->second : empty;
+// }
+
+// const std::vector &MappingState::getCurrentStepTiles(MappingLoc loc) const {
+// static const std::vector empty;
+// auto it = current_step_tiles.find(loc);
+// return it != current_step_tiles.end() ? it->second : empty;
+// }
+
+std::vector MappingState::getCurrentStepLinks(MappingLoc loc) const {
+ assert((loc.resource->getKind() == ResourceKind::Tile) &&
+ "Current step links can only be queried for tiles");
+ std::vector current_step_links;
+ const int current_step = loc.time_step;
+ assert(current_step < II * kMaxSteps && "Current step exceeds max steps");
+ // Collects neighboring tiles at t for given tile.
+ Tile *tile = dyn_cast(loc.resource);
+ for (Link* out_link : tile->getOutLinks()) {
+ MappingLoc current_step_out_link_loc = {out_link, current_step};
+ current_step_links.push_back(current_step_out_link_loc);
+ }
+ return current_step_links;
}
-const std::vector &MappingState::getCurrentStepLinks(MappingLoc loc) const {
- static const std::vector empty;
- auto it = current_step_links.find(loc);
- return it != current_step_links.end() ? it->second : empty;
+void MappingState::reserveRoute(Operation *op, ArrayRef path) {
+
+ // Records all mapping locations.
+ assert(op_to_locs.find(op) == op_to_locs.end() &&
+ "Operation already has reserved locations");
+ op_to_locs[op] = std::vector(path.begin(), path.end());
+
+ for (const MappingLoc &loc : path) {
+ assert(occupied_locs.find(loc) == occupied_locs.end() &&
+ "Mapping location already occupied");
+ loc_to_op[loc] = op;
+ assert(occupied_locs.find(loc) == occupied_locs.end() &&
+ "Mapping location already occupied in occupied_locs");
+ occupied_locs.insert(loc);
+ }
+}
+
+void MappingState::releaseRoute(Operation *op) {
+ auto it = op_to_locs.find(op);
+ if (it == op_to_locs.end())
+ return;
+
+ const std::vector &route = it->second;
+
+ for (const MappingLoc &loc : route) {
+ loc_to_op.erase(loc);
+ occupied_locs.erase(loc);
+ }
+
+ op_to_locs.erase(it);
+}
+
+void MappingState::dumpOpToLocs(llvm::raw_ostream &os) const {
+ os << "=== MappingState: op_to_locs ===\n";
+
+ for (const auto &[op, locs] : op_to_locs) {
+ os << " - " << op->getName();
+ if (auto name_attr = op->getAttrOfType("sym_name"))
+ os << " @" << name_attr;
+ os << "\n";
+
+ for (const MappingLoc &loc : locs) {
+ auto *res = loc.resource;
+ os << " -> " << res->getType() << "#" << res->getId()
+ << " @t=" << loc.time_step << "\n";
+ }
+ }
+
+ os << "=== End ===\n";
+}
+
+void MappingState::encodeMappingState() {
+ for (const auto &[op, locs] : op_to_locs) {
+ llvm::SmallVector mapping_entries;
+ auto ctx = op->getContext();
+ for (const MappingLoc &loc : locs) {
+ std::string kind_str;
+ if (loc.resource->getKind() == ResourceKind::Tile) {
+ kind_str = "tile";
+ } else if (loc.resource->getKind() == ResourceKind::Link) {
+ kind_str = "link";
+ } else {
+ kind_str = "unknown";
+ }
+ auto dict = mlir::DictionaryAttr::get(
+ ctx,
+ {
+ mlir::NamedAttribute(
+ mlir::StringAttr::get(ctx, "resource"),
+ mlir::StringAttr::get(ctx, kind_str)
+ ),
+ mlir::NamedAttribute(
+ mlir::StringAttr::get(ctx, "id"),
+ mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.resource->getId())
+ ),
+ mlir::NamedAttribute(
+ mlir::StringAttr::get(ctx, "time_step"),
+ mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.time_step)
+ )
+ }
+ );
+ mapping_entries.push_back(dict);
+ }
+ op->setAttr("mapping_locs", mlir::ArrayAttr::get(ctx, mapping_entries));
+ }
}
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp
index 3c724a7e..e0736669 100644
--- a/lib/NeuraDialect/Mapping/mapping_util.cpp
+++ b/lib/NeuraDialect/Mapping/mapping_util.cpp
@@ -1,9 +1,13 @@
#include
+#include
#include "NeuraDialect/Mapping/mapping_util.h"
#include "NeuraDialect/NeuraOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include
#include "mlir/IR/Operation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
using namespace mlir;
using namespace mlir::neura;
@@ -27,10 +31,18 @@ void traverseAlongPath(Operation *op, Value reserve_value,
Operation *res_op = reserve_value.getDefiningOp();
if (res_op) current_path.push_front(res_op);
- constexpr int kNumExcludedOps = 2;
+ int effective_length = 0;
+ for (Operation *op : current_path) {
+ // Skips the non-materialized ops when counting the cycle length.
+ if (!isa(op)) {
+ ++effective_length;
+ }
+ }
collected_paths.push_back(RecurrenceCycle{
operations: SmallVector(current_path.begin(), current_path.end()),
- length: static_cast(current_path.size()) - kNumExcludedOps
+ length: static_cast(effective_length)
});
if (res_op) current_path.pop_front();
@@ -71,7 +83,6 @@ SmallVector mlir::neura::collectRecurrenceCycles(Operation *
for (auto &cycle : collected_paths) {
cycle.operations.push_back(ctrl_mov_op);
- ++cycle.length;
recurrence_cycles.push_back(std::move(cycle));
}
});
@@ -89,8 +100,8 @@ int mlir::neura::calculateResMii(Operation *func_op,
if (isa(op) ||
isa(op)) {
+ neura::DataMovOp,
+ neura::ReserveOp>(op)) {
return;
}
++num_ops;
@@ -103,3 +114,535 @@ int mlir::neura::calculateResMii(Operation *func_op,
return llvm::divideCeil(num_ops, num_tiles);
}
+
+std::vector mlir::neura::getTopologicallySortedOps(Operation *func_op) {
+ std::vector sorted_ops;
+ llvm::DenseMap pending_deps;
+ std::deque ready_queue;
+
+ // Collects recurrence cycle ops.
+ auto recurrence_cycles = collectRecurrenceCycles(func_op);
+ llvm::DenseSet recurrence_ops;
+ for (const auto &cycle : recurrence_cycles)
+ for (Operation *op : cycle.operations)
+ recurrence_ops.insert(op);
+
+ // Counts unresolved dependencies for each op.
+ func_op->walk([&](Operation *op) {
+ if (op == func_op) return;
+ int dep_count = 0;
+ for (Value operand : op->getOperands())
+ if (operand.getDefiningOp())
+ ++dep_count;
+ pending_deps[op] = dep_count;
+ if (dep_count == 0) {
+ // TODO: Prioritize recurrence ops. But cause compiled II regression.
+ // https://github.com/coredac/dataflow/issues/59.
+ if (recurrence_ops.contains(op)) {
+ // ready_queue.push_front(op);
+ ready_queue.push_back(op);
+ } else {
+ ready_queue.push_back(op);
+ }
+ }
+ });
+
+ // BFS-style topological sort with recurrence priority.
+ while (!ready_queue.empty()) {
+ Operation *op = ready_queue.front();
+ ready_queue.pop_front();
+ sorted_ops.push_back(op);
+
+ for (Value result : op->getResults()) {
+ for (Operation *user : result.getUsers()) {
+ if (--pending_deps[user] == 0) {
+ // TODO: Prioritize recurrence ops. But cause compiled II regression.
+ // https://github.com/coredac/dataflow/issues/59.
+ if (recurrence_ops.contains(user)) {
+ // ready_queue.push_front(user);
+ ready_queue.push_back(user);
+ } else {
+ ready_queue.push_back(user);
+ }
+ }
+ }
+ }
+ }
+
+ return sorted_ops;
+}
+
+mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
+ assert(isa(op) && "Expected a ctrl_mov operation");
+ auto ctrl_mov = dyn_cast(op);
+ Value target = ctrl_mov.getTarget();
+
+ assert(isa(target.getDefiningOp()) &&
+ "Expected the user of ctrl_mov target to be a reserve operation");
+ auto reserve_op = dyn_cast(target.getDefiningOp());
+
+ // Skip ctrl_mov users of reserve; return the first phi user.
+ for (Operation *user : reserve_op.getResult().getUsers()) {
+ if (isa(user)) continue; // skip ctrl_mov user
+ if (isa(user)) return user;
+ }
+ assert(false && "No materialized backward user (i.e., phi) found for ctrl_mov");
+}
+
+llvm::SmallVector mlir::neura::getMaterializedUserOps(Operation *op) {
+ llvm::SmallVector result;
+ llvm::DenseSet visited;
+ visited.insert(op);
+ llvm::errs() << "Starting to collect materialized users for: " << *op << "\n";
+ llvm::SmallVector worklist(op->getUsers().begin(), op->getUsers().end());
+
+ while (!worklist.empty()) {
+ Operation *curr = worklist.pop_back_val();
+ llvm::errs() << "Visiting operation: " << *curr << "\n";
+ if (!visited.insert(curr).second) {
+ llvm::errs() << "Already visited, so skip: " << *curr << "\n";
+ continue;
+ }
+
+ if (isa(curr)) {
+ for (Operation *next : curr->getUsers()) {
+ if (visited.insert(next).second) {
+ // Only adds the next operation if it hasn't been visited yet.
+ worklist.push_back(next);
+ }
+ }
+ continue;
+ }
+
+ // Specially handles the ctrl_mov, i.e., the second operand of ctrl_mov is
+ // treated as a target/destination/user in terms of dataflow.
+ if (auto ctrl_mov = dyn_cast(curr)) {
+ Value target = ctrl_mov.getTarget();
+ for (Operation *user : target.getUsers()) {
+ if (visited.insert(user).second) {
+ worklist.push_back(user);
+ }
+ }
+ continue;
+ }
+
+ // Materialized op
+ result.push_back(curr);
+ }
+
+ for (Operation *res : result) {
+ llvm::errs() << "Materialized user: " << *res << "\n";
+ }
+ return result;
+}
+
+bool mlir::neura::tryRouteForwardMove(Operation *mov_op,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ const MappingState &state,
+ std::vector &path_out) {
+ return tryRouteDataMove(mov_op, src_loc, dst_loc, false, state, path_out);
+}
+
+bool mlir::neura::tryRouteBackwardMove(Operation *mov_op,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ const MappingState &state,
+ std::vector &path_out) {
+ llvm::errs() << "[tryRouteBackwardMove] src_loc: " << src_loc.resource->getType()
+ << "#" << src_loc.resource->getId()
+ << " @t=" << src_loc.time_step
+ << ", dst_loc: " << dst_loc.resource->getType()
+ << "#" << dst_loc.resource->getId()
+ << " @t=" << dst_loc.time_step << "\n";
+ return tryRouteDataMove(mov_op, src_loc, dst_loc, true, state, path_out);
+}
+
+bool mlir::neura::tryRouteDataMove(Operation *mov_op,
+ MappingLoc src_loc,
+ MappingLoc dst_loc,
+ bool is_backward_move,
+ const MappingState &state,
+ std::vector &path_out) {
+ // Specially handles the case where src and dst are the same tile.
+ if (src_loc.resource == dst_loc.resource) {
+ return true;
+ }
+ struct QueueEntry {
+ Tile *tile;
+ int time;
+ std::vector path;
+ };
+
+ Tile *src_tile = dyn_cast(src_loc.resource);
+ Tile *dst_tile = dyn_cast(dst_loc.resource);
+
+ std::queue queue;
+ std::set visited;
+
+ queue.push({src_tile, src_loc.time_step, {}});
+ visited.insert(src_tile);
+
+ // Tolerates the deadline step by II for backward moves (as the data should
+ // arrive at the next iteration).
+ const int deadline_step = dst_loc.time_step + (is_backward_move ? state.getII() : 0);
+
+ // BFS-style search for a path from src_tile to dst_tile.
+ while (!queue.empty()) {
+ auto [current_tile, current_time, current_path] = queue.front();
+ queue.pop();
+
+ if (current_tile == dst_tile) {
+ // Confirms path reaches the target tile no later than deadline step.
+ if (current_time <= deadline_step) {
+ // Either arrives exactly right before the dst starts computation.
+ // So the current_time on the target tile is the same as deadline step.
+ if (current_time == deadline_step) {
+ path_out = current_path;
+ return true;
+ }
+
+ // The last link can be held from arrival_time to dst_time - 1.
+ // TODO: We actually don't need to occupy the last link if the registers
+ // within the tile can be explicitly represented.
+ // https://github.com/coredac/dataflow/issues/52.
+ bool all_free = true;
+ assert(!current_path.empty() && "Path should not be empty when checking last link");
+ MappingLoc last_link = current_path.back();
+ std::vector last_link_occupying;
+ for (int t = current_time; t < deadline_step; ++t) {
+ MappingLoc repeated{last_link.resource, t};
+ last_link_occupying.push_back(repeated);
+ if (!state.isAvailableAcrossTime(repeated)) {
+ all_free = false;
+ break;
+ }
+ }
+ if (all_free) {
+ path_out = current_path;
+ path_out.insert(path_out.end(), last_link_occupying.begin(), last_link_occupying.end());
+ return true;
+ }
+
+ } else {
+ // Arrives too late, not schedulable.
+ continue;
+ }
+ }
+
+ for (MappingLoc current_step_next_link : state.getCurrentStepLinks({current_tile, current_time})) {
+ if (!state.isAvailableAcrossTime(current_step_next_link)) continue;
+
+ Link *next_link = dyn_cast(current_step_next_link.resource);
+ Tile *next_tile = next_link->getDstTile();
+ int next_time = current_time + 1;
+
+ if (!visited.insert(next_tile).second) continue;
+
+ std::vector extended_path = current_path;
+ extended_path.push_back(current_step_next_link);
+ queue.push({next_tile, next_time, std::move(extended_path)});
+ }
+ }
+
+ return false;
+}
+
+Operation* mlir::neura::getMaterializedProducer(Value operand) {
+ Operation *producer = operand.getDefiningOp();
+ assert(isa(producer) && "Expected operand to be defined by a DataMovOp");
+ // Finds the actual producer.
+ auto mov_op = dyn_cast(producer);
+ auto materialized_producer = mov_op.getOperand().getDefiningOp();
+ return materialized_producer;
+}
+
+bool mlir::neura::tryHeuristicMapping(std::vector &sorted_ops,
+ const Architecture &architecture,
+ MappingState &mapping_state) {
+ DenseSet visited;
+
+ for (Operation *op : sorted_ops) {
+ // TODO: Build up util func to distinguish materialized and non-materialized ops.
+ if (isa(op))
+ continue;
+
+ std::vector sorted_locs = calculateAward(op, architecture, mapping_state);
+ // auto target_loc = getLocWithMinCost(loc_with_cost);
+ if (sorted_locs.empty()) {
+ llvm::errs() << "[DEBUG] No locations found for op: " << *op << "\n";
+ return false; // No locations available for this operation.
+ }
+ assert(!sorted_locs.empty() &&
+ "No locations found for the operation to map");
+ MappingLoc target_loc = sorted_locs.front();
+ if (placeAndRoute(op, target_loc, mapping_state)) {
+ llvm::errs() << "[DEBUG] Successfully scheduled op: " << *op
+ << " at loc: " << target_loc.resource->getType()
+ << "#" << target_loc.resource->getId()
+ << " @t=" << target_loc.time_step << "\n";
+ continue;
+ } else {
+ llvm::errs() << "[DEBUG] Failed to schedule op: " << *op << "; target loc: " << target_loc.resource->getType() << "#" << target_loc.resource->getId() << " @t=" << target_loc.time_step << "\n";
+ }
+ // TODO: Optimization -- backtrack a few times if failed to schedule the op.
+ // https://github.com/coredac/dataflow/issues/59
+ return false;
+ }
+
+ return true;
+}
+
+bool mlir::neura::canReachLocInTime(const std::vector &producers,
+ const MappingLoc &target_loc,
+ int deadline_step,
+ const MappingState &mapping_state) {
+
+ for (Operation *producer : producers) {
+ // Get the last location of the producer.
+ auto producer_locs = mapping_state.getAllLocsOfOp(producer);
+ assert(!producer_locs.empty() && "No locations found for producer");
+
+ MappingLoc producer_loc = producer_locs.back();
+ if (!canReachLocInTime(producer_loc, target_loc, deadline_step, mapping_state)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc,
+ const MappingLoc &dst_loc,
+ int deadline_step,
+ const MappingState &mapping_state) {
+ // Checks if the destination is reachable from the source within the given time window.
+ if (src_loc.resource == dst_loc.resource &&
+ dst_loc.time_step <= deadline_step) {
+ return true;
+ }
+
+ // Checks if the destination is reachable from the source tile within given steps.
+ assert(isa(src_loc.resource));
+ assert(isa(dst_loc.resource));
+
+ struct QueueEntry {
+ MappingLoc loc;
+ int current_time;
+ };
+
+ std::queue queue;
+ llvm::DenseSet visited;
+
+ queue.push({src_loc, src_loc.time_step});
+ visited.insert(dyn_cast(src_loc.resource));
+
+ while (!queue.empty()) {
+ auto [current_loc, current_time] = queue.front();
+ queue.pop();
+
+ // If we reach the destination tile and time step is not after dst_loc
+ if (current_loc.resource == dst_loc.resource &&
+ current_time <= dst_loc.time_step &&
+ dst_loc.time_step <= deadline_step) {
+ return true;
+ }
+
+ if (current_time >= deadline_step)
+ continue;
+
+ // Explores all next step tiles from the current location.
+ for (const MappingLoc &next_loc : mapping_state.getNextStepTiles(current_loc)) {
+ if (!mapping_state.isAvailableAcrossTime(next_loc))
+ continue;
+
+ int next_time = current_time + 1;
+ if (next_time > deadline_step)
+ continue;
+
+ Tile *next_tile = llvm::dyn_cast(next_loc.resource);
+ assert(next_tile && "Next location must be a Tile");
+ if (visited.contains(next_tile)) {
+ continue;
+ }
+
+ visited.insert(next_tile);
+
+ MappingLoc next_step_loc = next_loc;
+ next_step_loc.time_step = next_time;
+
+ queue.push({next_step_loc, next_time});
+ }
+ }
+
+ return false;
+}
+
+void mlir::neura::updateAward(std::map &locs_with_award,
+ MappingLoc loc, int award) {
+ // Updates the award of the top element in the priority queue.
+ if (locs_with_award.find(loc) != locs_with_award.end()) {
+ locs_with_award[loc] += award;
+ } else {
+ locs_with_award[loc] = award;
+ }
+}
+
+std::vector mlir::neura::calculateAward(Operation *op,
+ const Architecture &architecture,
+ const MappingState &mapping_state) {
+ // A heap of locations with their associated award. Note that we use a max-heap
+ // to prioritize locations with higher awards.
+ std::map locs_with_award;
+
+ // Assembles all the producers.
+ std::vector producers;
+ for (Value operand : op->getOperands()) {
+ if (isa(operand.getDefiningOp())) {
+ // Skips Reserve ops (backward ctrl move) when estimate cost.
+ continue;
+ }
+ Operation *producer = getMaterializedProducer(operand);
+ assert(producer && "Expected a materialized producer");
+ producers.push_back(producer);
+ }
+
+ llvm::errs() << "[calculateAward] Operation: " << *op
+ << "; Producers: " << producers.size() << "\n";
+ for (Tile *tile : architecture.getAllTiles()) {
+ int earliest_start_time_step = 0;
+ for (Operation *producer : producers) {
+ std::vector producer_locs = mapping_state.getAllLocsOfOp(producer);
+ assert(!producer_locs.empty() && "No locations found for producer");
+
+ MappingLoc producer_loc = producer_locs.back();
+ earliest_start_time_step = std::max(earliest_start_time_step,
+ producer_loc.time_step + 1);
+ }
+ int award = mapping_state.getII() + tile->getDstTiles().size();
+ for (int t = earliest_start_time_step;
+ t < earliest_start_time_step + mapping_state.getII(); t += 1) {
+ MappingLoc tile_loc_candidate = {tile, t};
+ // If the tile at time `t` is available, we can consider it for mapping.
+ if (mapping_state.isAvailableAcrossTime(tile_loc_candidate)) {
+ // If no producer or the location is reachable by all producers,
+ // we can consider it for mapping and grant reward.
+ if (producers.empty() ||
+ canReachLocInTime(producers,
+ tile_loc_candidate,
+ t,
+ mapping_state)) {
+ updateAward(locs_with_award, tile_loc_candidate, award);
+ }
+ }
+ // The mapping location with earlier time step is granted with a higher award.
+ award -= 1;
+ }
+ assert(award >= 0 && "Award should not be negative");
+ }
+
+ // Copies map entries into a vector of pairs for sorting.
+ std::vector> locs_award_vec(locs_with_award.begin(), locs_with_award.end());
+
+ // Sorts by award (descending).
+ std::sort(locs_award_vec.begin(), locs_award_vec.end(),
+ [](const std::pair &a, const std::pair &b) {
+ return a.second > b.second;
+ });
+ // TODO: Needs to handle tie case and prioritize lower resource utilization, however,
+ // compiled II becomes worse after adding this tie-breaker: https://github.com/coredac/dataflow/issues/59.
+ // std::sort(locs_award_vec.begin(), locs_award_vec.end(),
+ // [&](const std::pair &a, const std::pair &b) {
+ // if (a.second != b.second) {
+ // return a.second > b.second;
+ // }
+ // // Tie-breaker: prioritizes lower resource utilization and earlier time step.
+ // if (a.first.time_step != b.first.time_step) {
+ // return a.first.time_step > b.first.time_step;
+ // }
+ // const bool is_resource_a_lower_utilized =
+ // mapping_state.countOpsAtResource(a.first.resource) >
+ // mapping_state.countOpsAtResource(b.first.resource);
+ // return is_resource_a_lower_utilized;
+ // });
+
+ // Extracts just the MappingLocs, already sorted by award.
+ std::vector sorted_locs;
+ sorted_locs.reserve(locs_award_vec.size());
+ for (const auto &pair : locs_award_vec)
+ sorted_locs.push_back(pair.first);
+
+ return sorted_locs;
+}
+
+llvm::SmallVector mlir::neura::getCtrlMovUsers(Operation *op) {
+ llvm::SmallVector result;
+ for (Operation *user : op->getUsers()) {
+ if (isa(user)) {
+ result.push_back(user);
+ }
+ }
+ return result;
+}
+
+bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state) {
+ if (mapping_state.bindOp(target_loc, op)) {
+ // Tries to route the data move operations.
+ for (Value operand : op->getOperands()) {
+ if (isa(operand.getDefiningOp())) {
+ // Skips Reserve ops (backward ctrl move) when estimate cost.
+ continue;
+ }
+ Operation *data_move = operand.getDefiningOp();
+ assert(isa(data_move) && "Expected a DataMovOp as operand producer");
+ Operation *producer = getMaterializedProducer(operand);
+ MappingLoc src_loc = mapping_state.getAllLocsOfOp(producer).back();
+
+ std::vector route_path;
+ if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state, route_path)) {
+ mapping_state.reserveRoute(data_move, route_path);
+ llvm::errs() << "[DEBUG] Successfully routed data move: " << *data_move
+ << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
+ << " @t=" << src_loc.time_step
+ << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+ << " @t=" << target_loc.time_step << "\n";
+ continue;
+ }
+ llvm::errs() << "[DEBUG] Failed to route data move: " << *data_move
+ << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
+ << " @t=" << src_loc.time_step
+ << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+ << " @t=" << target_loc.time_step << "\n";
+ mapping_state.unbindOp(op);
+ mapping_state.releaseRoute(data_move);
+ return false;
+ }
+ // Checks whether the operation's user is a ctrl_mov.
+ for (Operation *user : getCtrlMovUsers(op)) {
+ auto ctrl_mov = dyn_cast(user);
+ llvm::errs() << "[DEBUG] Found ctrl_mov user: " << *ctrl_mov << "\n";
+ assert(ctrl_mov && "Expected user to be a CtrlMovOp");
+ mlir::Operation *materialized_backward_op = getMaterializedBackwardUser(ctrl_mov);
+ assert(isa(materialized_backward_op) &&
+ "Expected materialized operation of ctrl_mov to be a PhiOp");
+ // Gets the last location of the materialized operation.
+ MappingLoc backward_loc = mapping_state.getAllLocsOfOp(materialized_backward_op).back();
+ // Routes the ctrl_mov to the phi location.
+ std::vector route_path;
+ if (tryRouteBackwardMove(ctrl_mov, target_loc, backward_loc, mapping_state, route_path)) {
+ mapping_state.reserveRoute(ctrl_mov, route_path);
+ llvm::errs() << "[DEBUG] Successfully routed ctrl_mov: " << *ctrl_mov
+ << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+ << " @t=" << backward_loc.time_step << "\n";
+ continue;
+ }
+ llvm::errs() << "[DEBUG] Failed to route ctrl_mov: " << *ctrl_mov
+ << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+ << " @t=" << backward_loc.time_step << "\n";
+ mapping_state.unbindOp(op);
+ mapping_state.releaseRoute(ctrl_mov);
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
\ No newline at end of file
diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt
index c1d16bdc..7d944fbb 100644
--- a/lib/NeuraDialect/Transforms/CMakeLists.txt
+++ b/lib/NeuraDialect/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_mlir_library(
TransformCtrlToDataFlowPass.cpp
LeveragePredicatedValuePass.cpp
MapToAcceleratorPass.cpp
+ GenerateCodePass.cpp
DEPENDS
MLIRNeuraTransformsIncGen
diff --git a/lib/NeuraDialect/Transforms/GenerateCodePass.cpp b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp
new file mode 100644
index 00000000..6c223f83
--- /dev/null
+++ b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp
@@ -0,0 +1,137 @@
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "NeuraDialect/NeuraPasses.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::neura;
+
+#define GEN_PASS_DEF_GenerateCode
+#include "NeuraDialect/NeuraPasses.h.inc"
+
+namespace {
+
+struct GenerateCodePass
+ : public PassWrapper> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(GenerateCodePass)
+
+ StringRef getArgument() const override { return "generate-code"; }
+ StringRef getDescription() const override {
+ return "Generates JSON code from mapped Neura IR.";
+ }
+
+ void getDependentDialects(DialectRegistry ®istry) const override {
+ registry.insert();
+ }
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+
+ llvm::json::Array functions_array;
+
+ for (auto func : module.getOps()) {
+ auto accel_attr = func->getAttrOfType("accelerator");
+ if (!accel_attr || accel_attr.getValue() != "neura")
+ continue;
+
+ llvm::json::Object func_obj;
+ func_obj["name"] = func.getName().str();
+
+ if (auto ii_attr = func->getAttrOfType("CompiledII"))
+ func_obj["CompiledII"] = ii_attr.getInt();
+ if (auto recMII_attr = func->getAttrOfType("RecMII"))
+ func_obj["RecMII"] = recMII_attr.getInt();
+ if (auto resMII_attr = func->getAttrOfType("ResMII"))
+ func_obj["ResMII"] = resMII_attr.getInt();
+
+ llvm::json::Array op_array;
+
+ func.walk([&](Operation *op) {
+ if (isa(op))
+ return;
+
+ llvm::json::Object op_obj;
+ op_obj["name"] = op->getName().getStringRef().str();
+
+ // Result types.
+ llvm::json::Array result_types;
+ for (auto result : op->getResults()) {
+ std::string type_str;
+ llvm::raw_string_ostream os(type_str);
+ result.getType().print(os);
+ result_types.push_back(os.str());
+ }
+ op_obj["result_types"] = std::move(result_types);
+
+ // Operands.
+ llvm::json::Array operand_indices;
+ for (Value operand : op->getOperands()) {
+ if (auto defining_op = operand.getDefiningOp())
+ operand_indices.push_back(defining_op->getName().getStringRef().str());
+ else
+ operand_indices.push_back("block_arg");
+ }
+ op_obj["operands"] = std::move(operand_indices);
+
+ // Constants.
+ if (auto const_op = mlir::dyn_cast(op)) {
+ auto val_attr = const_op.getValue();
+ if (val_attr) {
+ if (auto int_attr = mlir::dyn_cast(val_attr)) {
+ op_obj["constant_value"] = std::to_string(int_attr.getInt());
+ } else if (auto float_attr = mlir::dyn_cast(val_attr)) {
+ op_obj["constant_value"] = std::to_string(float_attr.getValueAsDouble());
+ }
+ }
+ }
+
+ // Mapping locs.
+ llvm::json::Array loc_array;
+ if (auto attr_array = op->getAttrOfType("mapping_locs")) {
+ for (Attribute attr : attr_array) {
+ if (auto loc = mlir::dyn_cast(attr)) {
+ llvm::json::Object loc_obj;
+ if (auto idAttr = mlir::dyn_cast(loc.get("id")))
+ loc_obj["id"] = idAttr.getInt();
+ if (auto resource_attr = mlir::dyn_cast(loc.get("resource")))
+ loc_obj["resource"] = resource_attr.getValue().str();
+ if (auto timestep_attr = mlir::dyn_cast(loc.get("time_step")))
+ loc_obj["time_step"] = timestep_attr.getInt();
+ loc_array.push_back(std::move(loc_obj));
+ }
+ }
+ }
+ op_obj["mapping_locs"] = std::move(loc_array);
+
+ op_array.push_back(std::move(op_obj));
+ });
+
+ func_obj["operations"] = std::move(op_array);
+ functions_array.push_back(std::move(func_obj));
+ }
+
+ // Final JSON object.
+ llvm::json::Object root;
+ root["functions"] = std::move(functions_array);
+
+ // llvm::outs() << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n";
+ std::error_code ec;
+ llvm::raw_fd_ostream json_out("generated-instructions.json", ec);
+ if (ec) {
+ getOperation()->emitError("Failed to open 'generated-instructions.json' for writing: " + ec.message());
+ return signalPassFailure();
+ }
+ json_out << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n";
+ }
+};
+
+} // namespace
+
+namespace mlir::neura {
+std::unique_ptr createGenerateCodePass() {
+ return std::make_unique();
+}
+} // namespace mlir::neura
diff --git a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
index ea6d1ef8..a684c92f 100644
--- a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
+++ b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
@@ -23,32 +23,65 @@ struct InsertDataMovForNeuraOps : public RewritePattern {
return failure();
}
- // Skips ops that already being inserted mov on the operands.
- bool allInputsAreMov = llvm::all_of(op->getOperands(), [](Value v) {
- return isa_and_nonnull(v.getDefiningOp());
+ bool all_inputs_are_mov_except_reserve = llvm::all_of(op->getOperands(), [](Value v) {
+ Operation *def_op = v.getDefiningOp();
+ return isa_and_nonnull(def_op) ||
+ isa_and_nonnull(def_op);
});
- if (allInputsAreMov) {
- return failure();
- }
+
+ if (all_inputs_are_mov_except_reserve)
+ return failure(); // All operands are already handled
+
+ // // Skips ops that already being inserted mov on the operands.
+ // bool all_inputs_are_mov = llvm::all_of(op->getOperands(), [](Value v) {
+ // return isa_and_nonnull(v.getDefiningOp());
+ // });
+ // if (all_inputs_are_mov) {
+ // return failure();
+ // }
+
+ // // Special case: skips rewriting phi if any operand is from reserve.
+ // if (isa(op)) {
+ // bool has_reserved_input = llvm::any_of(op->getOperands(), [](Value v) {
+ // return isa_and_nonnull(v.getDefiningOp());
+ // });
+
+ // if (has_reserved_input)
+ // return failure(); // Skip entire phi if any operand is reserved.
+ // }
// Makes sure none of the operand has being processed.
- bool hasAnyMovInput = llvm::any_of(op->getOperands(), [](Value v) {
+ bool has_any_mov_input = llvm::any_of(op->getOperands(), [](Value v) {
return isa_and_nonnull(v.getDefiningOp());
});
- assert(!hasAnyMovInput && "Unexpected: operand already wrapped in neura.mov");
+ if (has_any_mov_input)
+ llvm::errs() << "Warning: Operand already wrapped in neura.data_mov: " << *op << "\n";
+ assert(!has_any_mov_input && "Unexpected: operand already wrapped in neura.mov");
Location loc = op->getLoc();
+ // Skips adding mov if the consumer is ctrl_mov.
+ if (isa(op)) {
+ return failure(); // do not rewrite
+ }
+
// Wraps operands in mov.
- SmallVector newOperands;
+ SmallVector new_operands;
for (Value operand : op->getOperands()) {
+ Operation *producer = operand.getDefiningOp();
+ // Skips adding mov for neura.reserve -> neura.phi.
+ if (isa(op) && producer && isa(producer)) {
+ new_operands.push_back(operand);
+ continue;
+ }
+
auto mov = rewriter.create(loc, operand.getType(), operand);
- newOperands.push_back(mov);
+ new_operands.push_back(mov);
}
// Clones op with new operands.
OperationState state(loc, op->getName());
- state.addOperands(newOperands);
+ state.addOperands(new_operands);
state.addTypes(op->getResultTypes());
state.addAttributes(op->getAttrs());
@@ -59,8 +92,8 @@ struct InsertDataMovForNeuraOps : public RewritePattern {
}
}
- Operation *newOp = rewriter.create(state);
- rewriter.replaceOp(op, newOp->getResults());
+ Operation *new_op = rewriter.create(state);
+ rewriter.replaceOp(op, new_op->getResults());
return success();
}
};
diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
index 7831b50a..699d22cc 100644
--- a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
+++ b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -64,16 +64,33 @@ struct MapToAcceleratorPass
}
// AcceleratorConfig config{/*numTiles=*/8}; // Example
- Architecture architecture(2, 2);
+ Architecture architecture(4, 4);
int res_mii = calculateResMii(func, architecture);
IntegerAttr res_mii_attr = IntegerAttr::get(
IntegerType::get(func.getContext(), 32), res_mii);
func->setAttr("ResMII", res_mii_attr);
const int minII = std::min(rec_mii, res_mii);
- constexpr int maxII = 5;
+ constexpr int maxII = 10;
+ std::vector sorted_ops = getTopologicallySortedOps(func);
+ for (Operation *op : sorted_ops) {
+ llvm::errs() << "[MapToAcceleratorPass] sorted op: "
+ << *op << "\n";
+ }
for (int ii = minII; ii <= maxII; ++ii) {
- MappingState state(architecture, ii);
+ MappingState mapping_state(architecture, ii);
+ if (tryHeuristicMapping(sorted_ops, architecture, mapping_state)) {
+ // success
+ llvm::errs() << "[MapToAcceleratorPass] Successfully mapped function '"
+ << func.getName() << "' with II = " << ii << "\n";
+ mapping_state.dumpOpToLocs(); // logs to stderr
+ mapping_state.encodeMappingState();
+ func->setAttr("CompiledII", IntegerAttr::get(
+ IntegerType::get(func.getContext(), 32), ii));
+ break;
+ }
+ llvm::errs() << "[DEBUG] mapping failed for II = " << ii << "\n";
+ mapping_state.dumpOpToLocs(); // logs to stderr
}
});
}
diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir
index e656bbef..e5a57b8b 100644
--- a/test/neura/ctrl/branch_for.mlir
+++ b/test/neura/ctrl/branch_for.mlir
@@ -16,8 +16,28 @@
// RUN: --lower-llvm-to-neura \
// RUN: --leverage-predicated-value \
// RUN: --transform-ctrl-to-data-flow \
+// RUN: --insert-data-mov \
+// RUN: | FileCheck %s -check-prefix=MOV
+
+// RUN: mlir-neura-opt %s \
+// RUN: --assign-accelerator \
+// RUN: --lower-llvm-to-neura \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --insert-data-mov \
+// RUN: --map-to-accelerator \
+// RUN: | FileCheck %s -check-prefix=MAPPING
+
+// RUN: mlir-neura-opt %s \
+// RUN: --assign-accelerator \
+// RUN: --lower-llvm-to-neura \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --insert-data-mov \
// RUN: --map-to-accelerator \
-// RUN: | FileCheck %s -check-prefix=MII
+// RUN: --generate-code
+
+// RUN: FileCheck %s --input-file=generated-instructions.json -check-prefix=INST
func.func @loop_test() -> f32 {
%n = llvm.mlir.constant(10 : i64) : i64
@@ -81,4 +101,107 @@ func.func @loop_test() -> f32 {
// CTRL2DATA-NEXT: "neura.return"(%18) : (!neura.data) -> ()
// CTRL2DATA-NEXT: }
-// MII: func.func @loop_test() -> f32 attributes {RecMII = 4 : i32, ResMII = 4 : i32, accelerator = "neura"}
\ No newline at end of file
+// MOV: func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// MOV-NEXT: %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data
+// MOV-NEXT: %1 = "neura.data_mov"(%0) : (!neura.data) -> !neura.data
+// MOV-NEXT: %2 = "neura.grant_always"(%1) : (!neura.data) -> !neura.data
+// MOV-NEXT: %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data
+// MOV-NEXT: %4 = "neura.data_mov"(%3) : (!neura.data) -> !neura.data
+// MOV-NEXT: %5 = "neura.grant_once"(%4) : (!neura.data) -> !neura.data
+// MOV-NEXT: %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data
+// MOV-NEXT: %7 = "neura.data_mov"(%6) : (!neura.data) -> !neura.data
+// MOV-NEXT: %8 = "neura.grant_always"(%7) : (!neura.data) -> !neura.data
+// MOV-NEXT: %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data
+// MOV-NEXT: %10 = "neura.data_mov"(%9) : (!neura.data) -> !neura.data
+// MOV-NEXT: %11 = "neura.grant_always"(%10) : (!neura.data) -> !neura.data
+// MOV-NEXT: %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data
+// MOV-NEXT: %13 = "neura.data_mov"(%12) : (!neura.data) -> !neura.data
+// MOV-NEXT: %14 = "neura.grant_once"(%13) : (!neura.data) -> !neura.data
+// MOV-NEXT: %15 = neura.reserve : !neura.data
+// MOV-NEXT: %16 = "neura.data_mov"(%5) : (!neura.data) -> !neura.data
+// MOV-NEXT: %17 = "neura.phi"(%16, %15) : (!neura.data, !neura.data) -> !neura.data
+// MOV-NEXT: %18 = neura.reserve : !neura.data
+// MOV-NEXT: %19 = "neura.data_mov"(%14) : (!neura.data) -> !neura.data
+// MOV-NEXT: %20 = "neura.phi"(%19, %18) : (!neura.data, !neura.data) -> !neura.data
+// MOV-NEXT: %21 = "neura.data_mov"(%20) : (!neura.data) -> !neura.data
+// MOV-NEXT: %22 = "neura.data_mov"(%11) : (!neura.data) -> !neura.data
+// MOV-NEXT: %23 = "neura.fadd"(%21, %22) : (!neura.data, !neura.data) -> !neura.data
+// MOV-NEXT: %24 = "neura.data_mov"(%17) : (!neura.data) -> !neura.data
+// MOV-NEXT: %25 = "neura.data_mov"(%8) : (!neura.data) -> !neura.data
+// MOV-NEXT: %26 = "neura.add"(%24, %25) : (!neura.data, !neura.data) -> !neura.data
+// MOV-NEXT: %27 = "neura.data_mov"(%26) : (!neura.data) -> !neura.data
+// MOV-NEXT: %28 = "neura.data_mov"(%2) : (!neura.data) -> !neura.data
+// MOV-NEXT: %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data
+// MOV-NEXT: %30 = "neura.data_mov"(%29) : (!neura.data) -> !neura.data
+// MOV-NEXT: %31 = "neura.not"(%30) : (!neura.data) -> !neura.data
+// MOV-NEXT: %32 = "neura.data_mov"(%23) : (!neura.data) -> !neura.data
+// MOV-NEXT: %33 = "neura.data_mov"(%31) : (!neura.data) -> !neura.data
+// MOV-NEXT: %34 = neura.grant_predicate %32, %33 : !neura.data, !neura.data -> !neura.data
+// MOV-NEXT: %35 = "neura.data_mov"(%23) : (!neura.data) -> !neura.data
+// MOV-NEXT: %36 = "neura.data_mov"(%29) : (!neura.data) -> !neura.data
+// MOV-NEXT: %37 = neura.grant_predicate %35, %36 : !neura.data