diff --git a/include/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.h b/include/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.h
new file mode 100644
index 00000000..87facfaa
--- /dev/null
+++ b/include/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.h
@@ -0,0 +1,64 @@
+#ifndef NEURA_BACKTRACK_MAPPING_H
+#define NEURA_BACKTRACK_MAPPING_H
+
+#include "NeuraDialect/Mapping/MappingState.h"
+#include "NeuraDialect/Mapping/MappingStrategy.h"
+#include <climits>
+#include <map>
+#include <set>
+
+namespace mlir {
+namespace neura {
+class HeuristicMapping : public MappingStrategy {
+public:
+  HeuristicMapping(int max_location_to_try = 5, int max_backtrack_depth = 3)
+      : max_location_to_try(max_location_to_try), max_backtrack_depth(3) {}
+  bool map(std::vector<Operation *> &sorted_ops,
+           const Architecture &architecture,
+           MappingState &mapping_state) override;
+  std::string getName() const override {
+    if (max_backtrack_depth == 1 && max_location_to_try == INT_MAX) {
+      return "greedy";
+    } else if (max_backtrack_depth == INT_MAX &&
+               max_location_to_try == INT_MAX) {
+      return "exhaustive";
+    } else {
+      return "heuristic";
+    }
+  }
+
+private:
+  bool mapWithBacktrack(std::vector<Operation *> &sorted_ops,
+                        const Architecture &architecture,
+                        MappingState &mapping_state, size_t current_index,
+                        int backtrack_depth);
+
+  // Configuration parameters.
+  int max_location_to_try; // Maximum number of locations to try for
+                           // each op
+  int max_backtrack_depth; // Maximum depth for backtracking
+};
+} // namespace neura
+} // namespace mlir
+
+namespace mlir {
+namespace neura {
+class MappingStateSnapshot {
+public:
+  MappingStateSnapshot(const MappingState &mapping_state);
+
+  void restore(MappingState &mapping_state);
+
+  std::map<Operation *, std::vector<MappingLoc>> getOpToLocs() {
+    return this->op_to_locs;
+  }
+
+private:
+  std::set<MappingLoc> occupied_locs;
+  std::map<MappingLoc, Operation *> loc_to_op;
+  std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
+};
+} // namespace neura
+} // namespace mlir
+
+#endif // NEURA_BACKTRACK_MAPPING_H
\ No newline at end of file
diff --git a/include/NeuraDialect/Mapping/MappingState.h b/include/NeuraDialect/Mapping/MappingState.h
index 5736f654..1530a4d6 100644
--- a/include/NeuraDialect/Mapping/MappingState.h
+++ b/include/NeuraDialect/Mapping/MappingState.h
@@ -1,20 +1,20 @@
 #ifndef NEURA_MAPPING_STATE_H
 #define NEURA_MAPPING_STATE_H
 
-#include "mlir/IR/Operation.h"
 #include "NeuraDialect/Architecture/Architecture.h"
+#include "mlir/IR/Operation.h"
 #include "llvm/Support/raw_ostream.h"
+#include <optional>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
-#include <optional>
 
 namespace mlir {
 namespace neura {
 
 // Represents a spatial-temporal location: (resource, time_step)
 struct MappingLoc {
-  BasicResource* resource;
+  BasicResource *resource;
   int time_step;
 
   bool operator==(const MappingLoc &other) const {
@@ -32,15 +32,14 @@ struct MappingLoc {
 } // namespace mlir
 
 namespace std {
-template <>
-struct hash<mlir::neura::MappingLoc> {
-  std::size_t operator()(const mlir::neura::MappingLoc& loc) const {
-    std::size_t h1 = std::hash<mlir::neura::BasicResource*>()(loc.resource);
+template <> struct hash<mlir::neura::MappingLoc> {
+  std::size_t operator()(const mlir::neura::MappingLoc &loc) const {
+    std::size_t h1 = std::hash<mlir::neura::BasicResource *>()(loc.resource);
     std::size_t h2 = std::hash<int>()(loc.time_step);
     return h1 ^ (h2 << 1);
   }
 };
-}
+} // namespace std
 
 namespace mlir {
 namespace neura {
@@ -63,7 +62,7 @@ class MappingState {
   bool isAvailableAcrossTime(const MappingLoc &loc) const;
 
   // Gets the operation at a specific (tile/link, time_step) location.
-  std::optional<Operation*> getOpAt(MappingLoc loc) const;
+  std::optional<Operation *> getOpAt(MappingLoc loc) const;
 
   // Counts the number of operations at a specific resource across time steps.
   int countOpsAtResource(BasicResource *resource) const;
@@ -83,11 +82,11 @@ class MappingState {
   // Gets neighboring tiles on next step of a given MappingLoc.
   std::vector<MappingLoc> getNextStepTiles(MappingLoc loc) const;
 
-//   // Gets neighboring links on next step of a given MappingLoc.
-//   const std::vector<MappingLoc> &getNextStepLinks(MappingLoc loc) const;
+  //   // Gets neighboring links on next step of a given MappingLoc.
+  //   const std::vector<MappingLoc> &getNextStepLinks(MappingLoc loc) const;
 
-//   // Gets neighboring tiles on current step of a given MappingLoc.
-//   const std::vector<MappingLoc> &getCurrentStepTiles(MappingLoc loc) const;
+  //   // Gets neighboring tiles on current step of a given MappingLoc.
+  //   const std::vector<MappingLoc> &getCurrentStepTiles(MappingLoc loc) const;
 
   // Gets neighboring links on current step of a given MappingLoc.
   std::vector<MappingLoc> getCurrentStepLinks(MappingLoc loc) const;
@@ -100,6 +99,28 @@ class MappingState {
 
   void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const;
 
+  // Getters for state information.
+  const std::set<MappingLoc> &getOccupiedLocs() const {
+    return this->occupied_locs;
+  }
+  const std::map<MappingLoc, Operation *> &getLocToOp() const {
+    return this->loc_to_op;
+  }
+  const std::map<Operation *, std::vector<MappingLoc>> &getOpToLocs() const {
+    return this->op_to_locs;
+  }
+
+  // Setters for state information.
+  void setOccupiedLocs(const std::set<MappingLoc> &locs) {
+    this->occupied_locs = locs;
+  }
+  void setLocToOp(const std::map<MappingLoc, Operation *> &loc_to_op) {
+    this->loc_to_op = loc_to_op;
+  }
+  void setOpToLocs(const std::map<Operation *, std::vector<MappingLoc>> &op_to_locs) {
+    this->op_to_locs = op_to_locs;
+  }
+
 private:
   // Initiation interval.
   int II;
@@ -107,8 +128,8 @@ class MappingState {
 
   std::set<MappingLoc> all_locs;
   std::set<MappingLoc> occupied_locs;
-  std::map<MappingLoc, Operation*> loc_to_op;
-  std::map<Operation*, std::vector<MappingLoc>> op_to_locs;
+  std::map<MappingLoc, Operation *> loc_to_op;
+  std::map<Operation *, std::vector<MappingLoc>> op_to_locs;
 };
 
 } // namespace neura
diff --git a/include/NeuraDialect/Mapping/MappingStrategy.h b/include/NeuraDialect/Mapping/MappingStrategy.h
new file mode 100644
index 00000000..d244d185
--- /dev/null
+++ b/include/NeuraDialect/Mapping/MappingStrategy.h
@@ -0,0 +1,28 @@
+#ifndef NEURA_MAPPING_STRATEGY_H
+#define NEURA_MAPPING_STRATEGY_H
+
+#include "NeuraDialect/Architecture/Architecture.h"
+#include "NeuraDialect/Mapping/MappingState.h"
+#include <vector>
+
+namespace mlir {
+namespace neura {
+
+// Abstract base class for different mapping strategies.
+class MappingStrategy {
+public:
+  virtual ~MappingStrategy() = default;
+
+  // Applies the mapping strategy to map operations onto hardware
+  virtual bool map(std::vector<Operation *> &sorted_ops,
+                   const Architecture &architecture,
+                   MappingState &mapping_state) = 0;
+
+  // Gets the name of this strategy
+  virtual std::string getName() const = 0;
+};
+
+} // namespace neura
+} // namespace mlir
+
+#endif // NEURA_MAPPING_STRATEGY_H
\ No newline at end of file
diff --git a/lib/NeuraDialect/Architecture/Architecture.cpp b/lib/NeuraDialect/Architecture/Architecture.cpp
index ad952152..f502b1b5 100644
--- a/lib/NeuraDialect/Architecture/Architecture.cpp
+++ b/lib/NeuraDialect/Architecture/Architecture.cpp
@@ -12,19 +12,13 @@ Tile::Tile(int id, int x, int y) {
   this->y = y;
 }
 
-int Tile::getId() const {
-  return id;
-}
+int Tile::getId() const { return id; }
 
-int Tile::getX() const {
-  return x;
-}
+int Tile::getX() const { return x; }
 
-int Tile::getY() const {
-  return y;
-}
+int Tile::getY() const { return y; }
 
-void Tile::linkDstTile(Link* link, Tile* tile) {
+void Tile::linkDstTile(Link *link, Tile *tile) {
   assert(tile && "Cannot link to a null tile");
   dst_tiles.insert(tile);
   out_links.insert(link);
@@ -32,39 +26,23 @@ void Tile::linkDstTile(Link* link, Tile* tile) {
   tile->in_links.insert(link);
 }
 
-const std::set<Tile*>& Tile::getDstTiles() const {
-  return dst_tiles;
-}
+const std::set<Tile *> &Tile::getDstTiles() const { return dst_tiles; }
 
-const std::set<Tile*>& Tile::getSrcTiles() const {
-  return src_tiles;
-}
+const std::set<Tile *> &Tile::getSrcTiles() const { return src_tiles; }
 
-const std::set<Link*>& Tile::getOutLinks() const {
-  return out_links;
-}
+const std::set<Link *> &Tile::getOutLinks() const { return out_links; }
 
-const std::set<Link*>& Tile::getInLinks() const {
-  return in_links;
-}
+const std::set<Link *> &Tile::getInLinks() const { return in_links; }
 
-Link::Link(int id) {
-  this->id = id;
-}
+Link::Link(int id) { this->id = id; }
 
-int Link::getId() const {
-  return id;
-}
+int Link::getId() const { return id; }
 
-Tile* Link::getSrcTile() const {
-  return src_tile;
-}
+Tile *Link::getSrcTile() const { return src_tile; }
 
-Tile* Link::getDstTile() const {
-  return dst_tile;
-}
+Tile *Link::getDstTile() const { return dst_tile; }
 
-void Link::connect(Tile* src, Tile* dst) {
+void Link::connect(Tile *src, Tile *dst) {
   assert(src && dst && "Cannot connect null tiles");
   src_tile = src;
   dst_tile = dst;
@@ -91,7 +69,7 @@ Architecture::Architecture(int width, int height) {
   int link_id = 0;
   for (int i = 0; i < width; ++i) {
     for (int j = 0; j < height; ++j) {
-      Tile* tile = getTile(i, j);
+      Tile *tile = getTile(i, j);
       if (i > 0) {
         auto link_towards_left = std::make_unique<Link>(link_id++);
         link_towards_left->connect(tile, getTile(i - 1, j));
@@ -101,7 +79,7 @@ Architecture::Architecture(int width, int height) {
         auto link_towards_right = std::make_unique<Link>(link_id++);
         link_towards_right->connect(tile, getTile(i + 1, j));
         link_storage.push_back(std::move(link_towards_right));
-    }
+      }
       if (j > 0) {
         auto link_towards_down = std::make_unique<Link>(link_id++);
         link_towards_down->connect(tile, getTile(i, j - 1));
@@ -116,20 +94,20 @@ Architecture::Architecture(int width, int height) {
   }
 }
 
-Tile* Architecture::getTile(int id) {
+Tile *Architecture::getTile(int id) {
   auto it = id_to_tile.find(id);
   assert(it != id_to_tile.end() && "Tile with given ID not found");
   return it->second;
 }
 
-Tile* Architecture::getTile(int x, int y) {
+Tile *Architecture::getTile(int x, int y) {
   auto it = coord_to_tile.find({x, y});
   assert(it != coord_to_tile.end() && "Tile with given coordinates not found");
   return it->second;
 }
 
-std::vector<Tile*> Architecture::getAllTiles() const {
-  std::vector<Tile*> result;
+std::vector<Tile *> Architecture::getAllTiles() const {
+  std::vector<Tile *> result;
   for (auto &tile : tile_storage)
     result.push_back(tile.get());
   return result;
@@ -139,8 +117,8 @@ int Architecture::getNumTiles() const {
   return static_cast<int>(id_to_tile.size());
 }
 
-std::vector<Link*> Architecture::getAllLinks() const {
-  std::vector<Link*> all_links;
+std::vector<Link *> Architecture::getAllLinks() const {
+  std::vector<Link *> all_links;
   for (const auto &link : link_storage) {
     all_links.push_back(link.get());
   }
diff --git a/lib/NeuraDialect/CMakeLists.txt b/lib/NeuraDialect/CMakeLists.txt
index e61b0b15..c1faae04 100644
--- a/lib/NeuraDialect/CMakeLists.txt
+++ b/lib/NeuraDialect/CMakeLists.txt
@@ -5,6 +5,8 @@ add_mlir_dialect_library(MLIRNeura
         NeuraPasses.cpp
         Mapping/mapping_util.cpp
         Mapping/MappingState.cpp
+        Mapping/MappingStrategy.cpp
+        Mapping/HeuristicMapping/HeuristicMapping.cpp
         Architecture/Architecture.cpp
 
         ADDITIONAL_HEADER_DIRS
diff --git a/lib/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.cpp b/lib/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.cpp
new file mode 100644
index 00000000..edaa9979
--- /dev/null
+++ b/lib/NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.cpp
@@ -0,0 +1,103 @@
+#include "NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.h"
+#include "NeuraDialect/Mapping/mapping_util.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mlir {
+namespace neura {
+
+bool HeuristicMapping::map(std::vector<Operation *> &sorted_ops,
+                           const Architecture &architecture,
+                           MappingState &mapping_state) {
+  // Start the backtracking mapping process from the first operation.
+  return mapWithBacktrack(sorted_ops, architecture, mapping_state, 0, 0);
+}
+
+bool HeuristicMapping::mapWithBacktrack(std::vector<Operation *> &sorted_ops,
+                                        const Architecture &architecture,
+                                        MappingState &mapping_state,
+                                        size_t op_index, int backtrack_depth) {
+  // Checks if the backtrack depth exceeds the maximum allowed.
+  if (backtrack_depth > this->max_backtrack_depth) {
+    llvm::errs() << "[BacktrackMapping] Max backtrack depth reached\n";
+    return false; // Backtrack failed, max depth reached.
+  }
+
+  // Success condition: all operations are mapped (The op_index is larger than
+  // or equal to the number of operations).
+  if (op_index >= sorted_ops.size()) {
+    llvm::errs() << "[BacktrackMapping] Successfully mapped all operations.\n";
+    return true;
+  }
+
+  // Gets current operation to map.
+  Operation *op = sorted_ops[op_index];
+
+  // Skips non-materialized operations.
+  if (isa<neura::DataMovOp, neura::CtrlMovOp, neura::ReserveOp>(op)) {
+    return mapWithBacktrack(sorted_ops, architecture, mapping_state,
+                            op_index + 1, backtrack_depth);
+  }
+
+  // Gets candidate locations sorted by award.
+  std::vector<MappingLoc> sorted_locs =
+      calculateAward(op, architecture, mapping_state);
+
+  if (sorted_locs.empty()) {
+    llvm::errs() << "No locations found for op: " << *op << "\n";
+    return false; // No locations available for this operation.
+  }
+  assert(!sorted_locs.empty() && "No locations found for the operation to map");
+
+  // Limits the number of locations to try.
+  int locations_to_try =
+      std::min(static_cast<int>(sorted_locs.size()), this->max_location_to_try);
+
+  // Tries each candicate location in order of decreasing award.
+  for (int i = 0; i < locations_to_try; ++i) {
+    MappingLoc target_loc = sorted_locs[i];
+    // Creates a mapping snapshot of current state before attempting to map.
+    MappingStateSnapshot mappingstate_snapshot(mapping_state);
+
+    // Attempts to place and route the operation at the target location.
+    if (placeAndRoute(op, target_loc, mapping_state)) {
+      // Successfully placed and routed current operation, tries to map the next
+      // operation.
+      if (mapWithBacktrack(sorted_ops, architecture, mapping_state,
+                           op_index + 1, backtrack_depth)) {
+        return true; // Successfully mapped all operations.
+      }
+
+      // Failed to place next operation, restores the mapping state and try next
+      // location.
+      llvm::errs() << "[BACKTRACK] Failed to map in current location, "
+                   << "restoring mapping state and trying next location.\n";
+      llvm::errs() << "[BACKTRACK] Backtracking from op: " << *op << "\n";
+      mappingstate_snapshot.restore(mapping_state);
+      // Increments backtrack depth.
+      backtrack_depth++;
+    }
+  }
+
+  // All candidate locations failed.
+  return false;
+}
+
+} // namespace neura
+} // namespace mlir
+
+namespace mlir {
+namespace neura {
+MappingStateSnapshot::MappingStateSnapshot(const MappingState &mapping_state) {
+  this->occupied_locs = mapping_state.getOccupiedLocs();
+  this->loc_to_op = mapping_state.getLocToOp();
+  this->op_to_locs = mapping_state.getOpToLocs();
+}
+
+void MappingStateSnapshot::restore(MappingState &mapping_state) {
+  mapping_state.setOccupiedLocs(this->occupied_locs);
+  mapping_state.setLocToOp(this->loc_to_op);
+  mapping_state.setOpToLocs(this->op_to_locs);
+}
+} // namespace neura
+} // namespace mlir
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/MappingState.cpp b/lib/NeuraDialect/Mapping/MappingState.cpp
index 75485ae4..265f7730 100644
--- a/lib/NeuraDialect/Mapping/MappingState.cpp
+++ b/lib/NeuraDialect/Mapping/MappingState.cpp
@@ -1,12 +1,14 @@
 #include "NeuraDialect/Mapping/MappingState.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace mlir;
 using namespace mlir::neura;
 
 MappingState::MappingState(const Architecture &arch, int II) : II(II) {
-  // TODO: Use number of operations to determine the max steps for constructing MRRG.
-  for (Tile* tile : arch.getAllTiles()) {
+  // TODO: Use number of operations to determine the max steps for constructing
+  // MRRG.
+  for (Tile *tile : arch.getAllTiles()) {
     for (int t = 0; t < II * kMaxSteps; ++t) {
       MappingLoc loc = {tile, t};
       all_locs.insert(loc);
@@ -18,14 +20,16 @@ bool MappingState::bindOp(const MappingLoc &loc, Operation *op) {
   loc_to_op[loc] = op;
   occupied_locs.insert(loc);
   auto it = op_to_locs.find(op);
-  assert (it == op_to_locs.end() && "Operation already has reserved locations");
+  assert(it == op_to_locs.end() && "Operation already has reserved locations");
   op_to_locs[op].push_back(loc);
   return true;
 }
 
 void MappingState::unbindOp(Operation *op) {
   auto it = op_to_locs.find(op);
-  if (it == op_to_locs.end()) return;
+  if (it == op_to_locs.end()) {
+    return;
+  }
 
   for (const MappingLoc &loc : it->second) {
     loc_to_op.erase(loc);
@@ -39,15 +43,18 @@ bool MappingState::isAvailableAcrossTime(const MappingLoc &loc) const {
   for (int t = loc.time_step % II; t < II * kMaxSteps; t += II) {
     MappingLoc checkLoc = loc;
     checkLoc.time_step = t;
-    if (occupied_locs.find(checkLoc) != occupied_locs.end())
+    if (occupied_locs.find(checkLoc) != occupied_locs.end()) {
       return false;
+    }
   }
   return true;
 }
 
-std::optional<Operation*> MappingState::getOpAt(MappingLoc loc) const {
+std::optional<Operation *> MappingState::getOpAt(MappingLoc loc) const {
   auto it = loc_to_op.find(loc);
-  if (it == loc_to_op.end()) return std::nullopt;
+  if (it == loc_to_op.end()) {
+    return std::nullopt;
+  }
   return it->second;
 }
 
@@ -65,10 +72,12 @@ const std::set<MappingLoc> &MappingState::getAllLocs() const {
   return all_locs;
 }
 
-const std::vector<MappingLoc> &MappingState::getAllLocsOfOp(Operation *op) const {
+const std::vector<MappingLoc> &
+MappingState::getAllLocsOfOp(Operation *op) const {
   auto it = op_to_locs.find(op);
-  if (it != op_to_locs.end())
+  if (it != op_to_locs.end()) {
     return it->second;
+  }
 
   static const std::vector<MappingLoc> empty;
   return empty;
@@ -81,7 +90,7 @@ std::vector<MappingLoc> MappingState::getNextStepTiles(MappingLoc loc) const {
   // Collects neighboring tiles at t+1 for both tile and link.
   if (loc.resource->getKind() == ResourceKind::Tile) {
     Tile *tile = dyn_cast<Tile>(loc.resource);
-    for (Tile* dst : tile->getDstTiles()) {
+    for (Tile *dst : tile->getDstTiles()) {
       MappingLoc next_step_dst_tile_loc = {dst, next_step};
       next_step_tiles.push_back(next_step_dst_tile_loc);
     }
@@ -89,34 +98,41 @@ std::vector<MappingLoc> MappingState::getNextStepTiles(MappingLoc loc) const {
     next_step_tiles.push_back({tile, next_step});
   } else if (loc.resource->getKind() == ResourceKind::Link) {
     Link *link = dyn_cast<Link>(loc.resource);
-    Tile* dst = link->getDstTile();
+    Tile *dst = link->getDstTile();
     MappingLoc next_step_dst_tile_loc = {dst, next_step};
     next_step_tiles.push_back(next_step_dst_tile_loc);
   }
   return next_step_tiles;
 }
 
-// const std::vector<MappingLoc> &MappingState::getNextStepLinks(MappingLoc loc) const {
+// const std::vector<MappingLoc> &MappingState::getNextStepLinks(MappingLoc loc)
+// const {
 //   static const std::vector<MappingLoc> empty;
 //   auto it = next_step_links.find(loc);
 //   return it != next_step_links.end() ? it->second : empty;
 // }
 
-// const std::vector<MappingLoc> &MappingState::getCurrentStepTiles(MappingLoc loc) const {
+// const std::vector<MappingLoc> &MappingState::getCurrentStepTiles(MappingLoc
+// loc) const {
 //   static const std::vector<MappingLoc> empty;
 //   auto it = current_step_tiles.find(loc);
 //   return it != current_step_tiles.end() ? it->second : empty;
 // }
 
-std::vector<MappingLoc> MappingState::getCurrentStepLinks(MappingLoc loc) const {
+std::vector<MappingLoc>
+MappingState::getCurrentStepLinks(MappingLoc loc) const {
   assert((loc.resource->getKind() == ResourceKind::Tile) &&
          "Current step links can only be queried for tiles");
   std::vector<MappingLoc> current_step_links;
   const int current_step = loc.time_step;
-  assert(current_step < II * kMaxSteps && "Current step exceeds max steps");
+  if (!(current_step < II * kMaxSteps)) {
+    llvm::errs() << "Current step exceeds max steps: " << current_step
+                 << ", max steps: " << II * kMaxSteps << "\n";
+    return current_step_links; // Return empty if step exceeds max.
+  }
   // Collects neighboring tiles at t for given tile.
   Tile *tile = dyn_cast<Tile>(loc.resource);
-  for (Link* out_link : tile->getOutLinks()) {
+  for (Link *out_link : tile->getOutLinks()) {
     MappingLoc current_step_out_link_loc = {out_link, current_step};
     current_step_links.push_back(current_step_out_link_loc);
   }
@@ -142,8 +158,9 @@ void MappingState::reserveRoute(Operation *op, ArrayRef<MappingLoc> path) {
 
 void MappingState::releaseRoute(Operation *op) {
   auto it = op_to_locs.find(op);
-  if (it == op_to_locs.end())
+  if (it == op_to_locs.end()) {
     return;
+  }
 
   const std::vector<MappingLoc> &route = it->second;
 
@@ -160,8 +177,9 @@ void MappingState::dumpOpToLocs(llvm::raw_ostream &os) const {
 
   for (const auto &[op, locs] : op_to_locs) {
     os << "  - " << op->getName();
-    if (auto name_attr = op->getAttrOfType<StringAttr>("sym_name"))
+    if (auto name_attr = op->getAttrOfType<StringAttr>("sym_name")) {
       os << " @" << name_attr;
+    }
     os << "\n";
 
     for (const MappingLoc &loc : locs) {
@@ -170,7 +188,6 @@ void MappingState::dumpOpToLocs(llvm::raw_ostream &os) const {
          << " @t=" << loc.time_step << "\n";
     }
   }
-
   os << "=== End ===\n";
 }
 
@@ -182,29 +199,56 @@ void MappingState::encodeMappingState() {
       std::string kind_str;
       if (loc.resource->getKind() == ResourceKind::Tile) {
         kind_str = "tile";
+        Tile *tile = dyn_cast<Tile>(loc.resource);
+        auto dict = mlir::DictionaryAttr::get(
+            ctx, {mlir::NamedAttribute(mlir::StringAttr::get(ctx, "resource"),
+                                       mlir::StringAttr::get(ctx, kind_str)),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "id"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.resource->getId())),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "time_step"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.time_step)),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "x"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             tile->getX())),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "y"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             tile->getY()))});
+        mapping_entries.push_back(dict);
       } else if (loc.resource->getKind() == ResourceKind::Link) {
         kind_str = "link";
+        auto dict = mlir::DictionaryAttr::get(
+            ctx, {mlir::NamedAttribute(mlir::StringAttr::get(ctx, "resource"),
+                                       mlir::StringAttr::get(ctx, kind_str)),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "id"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.resource->getId())),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "time_step"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.time_step))});
+        mapping_entries.push_back(dict);
       } else {
         kind_str = "unknown";
+        auto dict = mlir::DictionaryAttr::get(
+            ctx, {mlir::NamedAttribute(mlir::StringAttr::get(ctx, "resource"),
+                                       mlir::StringAttr::get(ctx, kind_str)),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "id"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.resource->getId())),
+                  mlir::NamedAttribute(
+                      mlir::StringAttr::get(ctx, "time_step"),
+                      mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32),
+                                             loc.time_step))});
+        mapping_entries.push_back(dict);
       }
-      auto dict = mlir::DictionaryAttr::get(
-        ctx,
-        {
-          mlir::NamedAttribute(
-            mlir::StringAttr::get(ctx, "resource"),
-            mlir::StringAttr::get(ctx, kind_str)
-          ),
-          mlir::NamedAttribute(
-            mlir::StringAttr::get(ctx, "id"),
-            mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.resource->getId())
-          ),
-          mlir::NamedAttribute(
-            mlir::StringAttr::get(ctx, "time_step"),
-            mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.time_step)
-          )
-        }
-      );
-      mapping_entries.push_back(dict);
     }
     op->setAttr("mapping_locs", mlir::ArrayAttr::get(ctx, mapping_entries));
   }
diff --git a/lib/NeuraDialect/Mapping/MappingStrategy.cpp b/lib/NeuraDialect/Mapping/MappingStrategy.cpp
new file mode 100644
index 00000000..6718b431
--- /dev/null
+++ b/lib/NeuraDialect/Mapping/MappingStrategy.cpp
@@ -0,0 +1,2 @@
+#include "NeuraDialect/Mapping/MappingStrategy.h"
+#include <memory>
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp
index e0736669..a69d7a07 100644
--- a/lib/NeuraDialect/Mapping/mapping_util.cpp
+++ b/lib/NeuraDialect/Mapping/mapping_util.cpp
@@ -4,10 +4,11 @@
 #include "NeuraDialect/Mapping/mapping_util.h"
 #include "NeuraDialect/NeuraOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include <cassert>
 #include "mlir/IR/Operation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 using namespace mlir;
 using namespace mlir::neura;
@@ -20,37 +21,44 @@ void traverseAlongPath(Operation *op, Value reserve_value,
                        std::deque<Operation *> &current_path,
                        DenseSet<Operation *> &visited_in_path,
                        SmallVector<RecurrenceCycle, 4> &collected_paths) {
-  if (!op || visited_in_path.contains(op))
+  if (!op || visited_in_path.contains(op)) {
+    if (visited_in_path.contains(op)) {
+      llvm::errs() << "Skipping already visited operation: " << *op << "\n";
+    }
     return;
-
+  }
   visited_in_path.insert(op);
   current_path.push_front(op);
 
   for (Value operand : op->getOperands()) {
     if (operand == reserve_value) {
       Operation *res_op = reserve_value.getDefiningOp();
-      if (res_op) current_path.push_front(res_op);
+      if (res_op) {
+        current_path.push_front(res_op);
+      }
 
       int effective_length = 0;
       for (Operation *op : current_path) {
         // Skips the non-materialized ops when counting the cycle length.
-        if (!isa<neura::ReserveOp,
-                 neura::CtrlMovOp,
-                 neura::DataMovOp>(op)) {
+        if (!isa<neura::ReserveOp, neura::CtrlMovOp, neura::DataMovOp>(op)) {
           ++effective_length;
         }
       }
       collected_paths.push_back(RecurrenceCycle{
-        operations: SmallVector<Operation *>(current_path.begin(), current_path.end()),
-        length: static_cast<int>(effective_length)
+        operations :
+            SmallVector<Operation *>(current_path.begin(), current_path.end()),
+        length : static_cast<int>(effective_length)
       });
 
-      if (res_op) current_path.pop_front();
+      if (res_op) {
+        current_path.pop_front();
+      }
       continue;
     }
 
     if (Operation *def_op = operand.getDefiningOp()) {
-      traverseAlongPath(def_op, reserve_value, current_path, visited_in_path, collected_paths);
+      traverseAlongPath(def_op, reserve_value, current_path, visited_in_path,
+                        collected_paths);
     }
   }
 
@@ -60,26 +68,32 @@ void traverseAlongPath(Operation *op, Value reserve_value,
 
 } // namespace
 
-SmallVector<RecurrenceCycle, 4> mlir::neura::collectRecurrenceCycles(Operation *func_op) {
+SmallVector<RecurrenceCycle, 4>
+mlir::neura::collectRecurrenceCycles(Operation *func_op) {
   SmallVector<RecurrenceCycle, 4> recurrence_cycles;
 
   func_op->walk([&](neura::CtrlMovOp ctrl_mov_op) {
     Value target = ctrl_mov_op.getTarget();
     auto reserve_op = target.getDefiningOp<neura::ReserveOp>();
-    if (!reserve_op)
+    if (!reserve_op) {
       return;
+    }
 
     Value reserve_value = reserve_op.getResult();
     Value ctrl_mov_from = ctrl_mov_op.getValue();
 
     Operation *parent_op = ctrl_mov_from.getDefiningOp();
-    if (!parent_op)
+    if (!parent_op) {
       return;
+    }
 
     std::deque<Operation *> current_path;
     SmallVector<RecurrenceCycle, 4> collected_paths;
     DenseSet<Operation *> visited_in_path;
-    traverseAlongPath(parent_op, reserve_value, current_path, visited_in_path, collected_paths);
+    llvm::errs() << "Collecting recurrence cycles from back edge: parent_op "
+                 << *parent_op << "->" << reserve_op << "\n";
+    traverseAlongPath(parent_op, reserve_value, current_path, visited_in_path,
+                      collected_paths);
 
     for (auto &cycle : collected_paths) {
       cycle.operations.push_back(ctrl_mov_op);
@@ -97,11 +111,8 @@ int mlir::neura::calculateResMii(Operation *func_op,
   // Count all "compute" operations (non-terminators, non-block ops).
   func_op->walk([&](Operation *op) {
     // Skips non-materialized ops.
-    if (isa<func::FuncOp>(op) ||
-        isa<neura::ConstantOp,
-            neura::CtrlMovOp,
-            neura::DataMovOp,
-            neura::ReserveOp>(op)) {
+    if (isa<func::FuncOp>(op) || isa<neura::ConstantOp, neura::CtrlMovOp,
+                                     neura::DataMovOp, neura::ReserveOp>(op)) {
       return;
     }
     ++num_ops;
@@ -115,7 +126,8 @@ int mlir::neura::calculateResMii(Operation *func_op,
   return llvm::divideCeil(num_ops, num_tiles);
 }
 
-std::vector<Operation *> mlir::neura::getTopologicallySortedOps(Operation *func_op) {
+std::vector<Operation *>
+mlir::neura::getTopologicallySortedOps(Operation *func_op) {
   std::vector<Operation *> sorted_ops;
   llvm::DenseMap<Operation *, int> pending_deps;
   std::deque<Operation *> ready_queue;
@@ -129,11 +141,15 @@ std::vector<Operation *> mlir::neura::getTopologicallySortedOps(Operation *func_
 
   // Counts unresolved dependencies for each op.
   func_op->walk([&](Operation *op) {
-    if (op == func_op) return;
+    if (op == func_op) {
+      return;
+    }
     int dep_count = 0;
-    for (Value operand : op->getOperands())
-      if (operand.getDefiningOp())
+    for (Value operand : op->getOperands()) {
+      if (operand.getDefiningOp()) {
         ++dep_count;
+      }
+    }
     pending_deps[op] = dep_count;
     if (dep_count == 0) {
       // TODO: Prioritize recurrence ops. But cause compiled II regression.
@@ -183,18 +199,25 @@ mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
 
   // Skip ctrl_mov users of reserve; return the first phi user.
   for (Operation *user : reserve_op.getResult().getUsers()) {
-    if (isa<neura::CtrlMovOp>(user)) continue; // skip ctrl_mov user
-    if (isa<neura::PhiOp>(user)) return user;
+    if (isa<neura::CtrlMovOp>(user)) {
+      continue; // skip ctrl_mov user
+    }
+    if (isa<neura::PhiOp>(user)) {
+      return user;
+    }
   }
-  assert(false && "No materialized backward user (i.e., phi) found for ctrl_mov");
+  assert(false &&
+         "No materialized backward user (i.e., phi) found for ctrl_mov");
 }
 
-llvm::SmallVector<mlir::Operation *> mlir::neura::getMaterializedUserOps(Operation *op) {
+llvm::SmallVector<mlir::Operation *>
+mlir::neura::getMaterializedUserOps(Operation *op) {
   llvm::SmallVector<Operation *> result;
   llvm::DenseSet<Operation *> visited;
   visited.insert(op);
   llvm::errs() << "Starting to collect materialized users for: " << *op << "\n";
-  llvm::SmallVector<Operation *> worklist(op->getUsers().begin(), op->getUsers().end());
+  llvm::SmallVector<Operation *> worklist(op->getUsers().begin(),
+                                          op->getUsers().end());
 
   while (!worklist.empty()) {
     Operation *curr = worklist.pop_back_val();
@@ -236,32 +259,28 @@ llvm::SmallVector<mlir::Operation *> mlir::neura::getMaterializedUserOps(Operati
   return result;
 }
 
-bool mlir::neura::tryRouteForwardMove(Operation *mov_op,
-                                      MappingLoc src_loc,
+bool mlir::neura::tryRouteForwardMove(Operation *mov_op, MappingLoc src_loc,
                                       MappingLoc dst_loc,
                                       const MappingState &state,
                                       std::vector<MappingLoc> &path_out) {
   return tryRouteDataMove(mov_op, src_loc, dst_loc, false, state, path_out);
 }
 
-bool mlir::neura::tryRouteBackwardMove(Operation *mov_op,
-                                       MappingLoc src_loc,
+bool mlir::neura::tryRouteBackwardMove(Operation *mov_op, MappingLoc src_loc,
                                        MappingLoc dst_loc,
                                        const MappingState &state,
                                        std::vector<MappingLoc> &path_out) {
-  llvm::errs() << "[tryRouteBackwardMove] src_loc: " << src_loc.resource->getType()
-            << "#" << src_loc.resource->getId()
-            << " @t=" << src_loc.time_step
-            << ", dst_loc: " << dst_loc.resource->getType()
-            << "#" << dst_loc.resource->getId()
-            << " @t=" << dst_loc.time_step << "\n";
+  llvm::errs() << "[tryRouteBackwardMove] src_loc: "
+               << src_loc.resource->getType() << "#"
+               << src_loc.resource->getId() << " @t=" << src_loc.time_step
+               << ", dst_loc: " << dst_loc.resource->getType() << "#"
+               << dst_loc.resource->getId() << " @t=" << dst_loc.time_step
+               << "\n";
   return tryRouteDataMove(mov_op, src_loc, dst_loc, true, state, path_out);
 }
 
-bool mlir::neura::tryRouteDataMove(Operation *mov_op,
-                                   MappingLoc src_loc,
-                                   MappingLoc dst_loc,
-                                   bool is_backward_move,
+bool mlir::neura::tryRouteDataMove(Operation *mov_op, MappingLoc src_loc,
+                                   MappingLoc dst_loc, bool is_backward_move,
                                    const MappingState &state,
                                    std::vector<MappingLoc> &path_out) {
   // Specially handles the case where src and dst are the same tile.
@@ -278,14 +297,15 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op,
   Tile *dst_tile = dyn_cast<Tile>(dst_loc.resource);
 
   std::queue<QueueEntry> queue;
-  std::set<Tile*> visited;
+  std::set<Tile *> visited;
 
   queue.push({src_tile, src_loc.time_step, {}});
   visited.insert(src_tile);
 
   // Tolerates the deadline step by II for backward moves (as the data should
   // arrive at the next iteration).
-  const int deadline_step = dst_loc.time_step + (is_backward_move ? state.getII() : 0);
+  const int deadline_step =
+      dst_loc.time_step + (is_backward_move ? state.getII() : 0);
 
   // BFS-style search for a path from src_tile to dst_tile.
   while (!queue.empty()) {
@@ -307,7 +327,8 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op,
         // within the tile can be explicitly represented.
         // https://github.com/coredac/dataflow/issues/52.
         bool all_free = true;
-        assert(!current_path.empty() && "Path should not be empty when checking last link");
+        assert(!current_path.empty() &&
+               "Path should not be empty when checking last link");
         MappingLoc last_link = current_path.back();
         std::vector<MappingLoc> last_link_occupying;
         for (int t = current_time; t < deadline_step; ++t) {
@@ -320,7 +341,8 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op,
         }
         if (all_free) {
           path_out = current_path;
-          path_out.insert(path_out.end(), last_link_occupying.begin(), last_link_occupying.end());
+          path_out.insert(path_out.end(), last_link_occupying.begin(),
+                          last_link_occupying.end());
           return true;
         }
 
@@ -330,14 +352,18 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op,
       }
     }
 
-    for (MappingLoc current_step_next_link : state.getCurrentStepLinks({current_tile, current_time})) {
-      if (!state.isAvailableAcrossTime(current_step_next_link)) continue;
-
+    for (MappingLoc current_step_next_link :
+         state.getCurrentStepLinks({current_tile, current_time})) {
+      if (!state.isAvailableAcrossTime(current_step_next_link)) {
+        continue;
+      }
       Link *next_link = dyn_cast<Link>(current_step_next_link.resource);
       Tile *next_tile = next_link->getDstTile();
       int next_time = current_time + 1;
 
-      if (!visited.insert(next_tile).second) continue;
+      if (!visited.insert(next_tile).second) {
+        continue;
+      }
 
       std::vector<MappingLoc> extended_path = current_path;
       extended_path.push_back(current_step_next_link);
@@ -348,9 +374,10 @@ bool mlir::neura::tryRouteDataMove(Operation *mov_op,
   return false;
 }
 
-Operation* mlir::neura::getMaterializedProducer(Value operand) {
+Operation *mlir::neura::getMaterializedProducer(Value operand) {
   Operation *producer = operand.getDefiningOp();
-  assert(isa<neura::DataMovOp>(producer) && "Expected operand to be defined by a DataMovOp");
+  assert(isa<neura::DataMovOp>(producer) &&
+         "Expected operand to be defined by a DataMovOp");
   // Finds the actual producer.
   auto mov_op = dyn_cast<neura::DataMovOp>(producer);
   auto materialized_producer = mov_op.getOperand().getDefiningOp();
@@ -363,11 +390,14 @@ bool mlir::neura::tryHeuristicMapping(std::vector<Operation *> &sorted_ops,
   DenseSet<Operation *> visited;
 
   for (Operation *op : sorted_ops) {
-    // TODO: Build up util func to distinguish materialized and non-materialized ops.
-    if (isa<neura::DataMovOp, neura::CtrlMovOp, neura::ReserveOp>(op))
+    // TODO: Build up util func to distinguish materialized and non-materialized
+    // ops.
+    if (isa<neura::DataMovOp, neura::CtrlMovOp, neura::ReserveOp>(op)) {
       continue;
+    }
 
-    std::vector<MappingLoc> sorted_locs = calculateAward(op, architecture, mapping_state);
+    std::vector<MappingLoc> sorted_locs =
+        calculateAward(op, architecture, mapping_state);
     // auto target_loc = getLocWithMinCost(loc_with_cost);
     if (sorted_locs.empty()) {
       llvm::errs() << "[DEBUG] No locations found for op: " << *op << "\n";
@@ -378,12 +408,15 @@ bool mlir::neura::tryHeuristicMapping(std::vector<Operation *> &sorted_ops,
     MappingLoc target_loc = sorted_locs.front();
     if (placeAndRoute(op, target_loc, mapping_state)) {
       llvm::errs() << "[DEBUG] Successfully scheduled op: " << *op
-                   << " at loc: " << target_loc.resource->getType()
-                   << "#" << target_loc.resource->getId()
+                   << " at loc: " << target_loc.resource->getType() << "#"
+                   << target_loc.resource->getId()
                    << " @t=" << target_loc.time_step << "\n";
       continue;
     } else {
-      llvm::errs() << "[DEBUG] Failed to schedule op: " << *op << "; target loc: " << target_loc.resource->getType() << "#" << target_loc.resource->getId() << " @t=" << target_loc.time_step << "\n";
+      llvm::errs() << "[DEBUG] Failed to schedule op: " << *op
+                   << "; target loc: " << target_loc.resource->getType() << "#"
+                   << target_loc.resource->getId()
+                   << " @t=" << target_loc.time_step << "\n";
     }
     // TODO: Optimization -- backtrack a few times if failed to schedule the op.
     // https://github.com/coredac/dataflow/issues/59
@@ -404,7 +437,8 @@ bool mlir::neura::canReachLocInTime(const std::vector<Operation *> &producers,
     assert(!producer_locs.empty() && "No locations found for producer");
 
     MappingLoc producer_loc = producer_locs.back();
-    if (!canReachLocInTime(producer_loc, target_loc, deadline_step, mapping_state)) {
+    if (!canReachLocInTime(producer_loc, target_loc, deadline_step,
+                           mapping_state)) {
       return false;
     }
   }
@@ -415,13 +449,15 @@ bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc,
                                     const MappingLoc &dst_loc,
                                     int deadline_step,
                                     const MappingState &mapping_state) {
-  // Checks if the destination is reachable from the source within the given time window.
+  // Checks if the destination is reachable from the source within the given
+  // time window.
   if (src_loc.resource == dst_loc.resource &&
       dst_loc.time_step <= deadline_step) {
     return true;
   }
 
-  // Checks if the destination is reachable from the source tile within given steps.
+  // Checks if the destination is reachable from the source tile within given
+  // steps.
   assert(isa<Tile>(src_loc.resource));
   assert(isa<Tile>(dst_loc.resource));
 
@@ -447,17 +483,21 @@ bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc,
       return true;
     }
 
-    if (current_time >= deadline_step)
+    if (current_time >= deadline_step) {
       continue;
+    }
 
     // Explores all next step tiles from the current location.
-    for (const MappingLoc &next_loc : mapping_state.getNextStepTiles(current_loc)) {
-      if (!mapping_state.isAvailableAcrossTime(next_loc))
+    for (const MappingLoc &next_loc :
+         mapping_state.getNextStepTiles(current_loc)) {
+      if (!mapping_state.isAvailableAcrossTime(next_loc)) {
         continue;
+      }
 
       int next_time = current_time + 1;
-      if (next_time > deadline_step)
+      if (next_time > deadline_step) {
         continue;
+      }
 
       Tile *next_tile = llvm::dyn_cast<Tile>(next_loc.resource);
       assert(next_tile && "Next location must be a Tile");
@@ -487,11 +527,11 @@ void mlir::neura::updateAward(std::map<MappingLoc, int> &locs_with_award,
   }
 }
 
-std::vector<MappingLoc> mlir::neura::calculateAward(Operation *op,
-                                                    const Architecture &architecture,
-                                                    const MappingState &mapping_state) {
-  // A heap of locations with their associated award. Note that we use a max-heap
-  // to prioritize locations with higher awards.
+std::vector<MappingLoc>
+mlir::neura::calculateAward(Operation *op, const Architecture &architecture,
+                            const MappingState &mapping_state) {
+  // A heap of locations with their associated award. Note that we use a
+  // max-heap to prioritize locations with higher awards.
   std::map<MappingLoc, int> locs_with_award;
 
   // Assembles all the producers.
@@ -507,16 +547,17 @@ std::vector<MappingLoc> mlir::neura::calculateAward(Operation *op,
   }
 
   llvm::errs() << "[calculateAward] Operation: " << *op
-             << "; Producers: " << producers.size() << "\n";
+               << "; Producers: " << producers.size() << "\n";
   for (Tile *tile : architecture.getAllTiles()) {
     int earliest_start_time_step = 0;
     for (Operation *producer : producers) {
-      std::vector<MappingLoc> producer_locs = mapping_state.getAllLocsOfOp(producer);
+      std::vector<MappingLoc> producer_locs =
+          mapping_state.getAllLocsOfOp(producer);
       assert(!producer_locs.empty() && "No locations found for producer");
 
       MappingLoc producer_loc = producer_locs.back();
-      earliest_start_time_step = std::max(earliest_start_time_step,
-                                          producer_loc.time_step + 1);
+      earliest_start_time_step =
+          std::max(earliest_start_time_step, producer_loc.time_step + 1);
     }
     int award = mapping_state.getII() + tile->getDstTiles().size();
     for (int t = earliest_start_time_step;
@@ -527,36 +568,39 @@ std::vector<MappingLoc> mlir::neura::calculateAward(Operation *op,
         // If no producer or the location is reachable by all producers,
         // we can consider it for mapping and grant reward.
         if (producers.empty() ||
-            canReachLocInTime(producers,
-                                  tile_loc_candidate,
-                                  t,
-                                  mapping_state)) {
+            canReachLocInTime(producers, tile_loc_candidate, t,
+                              mapping_state)) {
           updateAward(locs_with_award, tile_loc_candidate, award);
         }
       }
-      // The mapping location with earlier time step is granted with a higher award.
+      // The mapping location with earlier time step is granted with a higher
+      // award.
       award -= 1;
     }
     assert(award >= 0 && "Award should not be negative");
   }
 
   // Copies map entries into a vector of pairs for sorting.
-  std::vector<std::pair<MappingLoc, int>> locs_award_vec(locs_with_award.begin(), locs_with_award.end());
+  std::vector<std::pair<MappingLoc, int>> locs_award_vec(
+      locs_with_award.begin(), locs_with_award.end());
 
   // Sorts by award (descending).
-  std::sort(locs_award_vec.begin(), locs_award_vec.end(),
-            [](const std::pair<MappingLoc, int> &a, const std::pair<MappingLoc, int> &b) {
-              return a.second > b.second;
-            });
-  // TODO: Needs to handle tie case and prioritize lower resource utilization, however,
-  // compiled II becomes worse after adding this tie-breaker: https://github.com/coredac/dataflow/issues/59.
+  std::sort(
+      locs_award_vec.begin(), locs_award_vec.end(),
+      [](const std::pair<MappingLoc, int> &a,
+         const std::pair<MappingLoc, int> &b) { return a.second > b.second; });
+  // TODO: Needs to handle tie case and prioritize lower resource utilization,
+  // however, compiled II becomes worse after adding this tie-breaker:
+  // https://github.com/coredac/dataflow/issues/59.
   // std::sort(locs_award_vec.begin(), locs_award_vec.end(),
-  //           [&](const std::pair<MappingLoc, int> &a, const std::pair<MappingLoc, int> &b) {
+  //           [&](const std::pair<MappingLoc, int> &a, const
+  //           std::pair<MappingLoc, int> &b) {
   //               if (a.second != b.second) {
   //                 return a.second > b.second;
   //               }
-  //               // Tie-breaker: prioritizes lower resource utilization and earlier time step.
-  //               if (a.first.time_step != b.first.time_step) {
+  //               // Tie-breaker: prioritizes lower resource utilization and
+  //               earlier time step. if (a.first.time_step !=
+  //               b.first.time_step) {
   //                 return a.first.time_step > b.first.time_step;
   //               }
   //               const bool is_resource_a_lower_utilized =
@@ -584,36 +628,50 @@ llvm::SmallVector<Operation *> mlir::neura::getCtrlMovUsers(Operation *op) {
   return result;
 }
 
-bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state) {
+bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
+                                MappingState &mapping_state) {
   if (mapping_state.bindOp(target_loc, op)) {
+    std::vector<Operation *> routed_operands;
+    std::vector<Operation *> routed_ctrl_movs;
     // Tries to route the data move operations.
     for (Value operand : op->getOperands()) {
+      llvm::errs() << "Processing operand: " << operand << "\n";
       if (isa<neura::ReserveOp>(operand.getDefiningOp())) {
         // Skips Reserve ops (backward ctrl move) when estimate cost.
         continue;
       }
       Operation *data_move = operand.getDefiningOp();
-      assert(isa<neura::DataMovOp>(data_move) && "Expected a DataMovOp as operand producer");
+      assert(isa<neura::DataMovOp>(data_move) &&
+             "Expected a DataMovOp as operand producer");
       Operation *producer = getMaterializedProducer(operand);
       MappingLoc src_loc = mapping_state.getAllLocsOfOp(producer).back();
 
       std::vector<MappingLoc> route_path;
-      if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state, route_path)) {
+      if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state,
+                              route_path)) {
+        // Reserves the route for the data move operation.
         mapping_state.reserveRoute(data_move, route_path);
+        routed_operands.push_back(data_move);
         llvm::errs() << "[DEBUG] Successfully routed data move: " << *data_move
-                     << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
-                     << " @t=" << src_loc.time_step
-                     << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+                     << " from " << src_loc.resource->getType() << "#"
+                     << src_loc.resource->getId() << " @t=" << src_loc.time_step
+                     << " to " << target_loc.resource->getType() << "#"
+                     << target_loc.resource->getId()
                      << " @t=" << target_loc.time_step << "\n";
         continue;
       }
       llvm::errs() << "[DEBUG] Failed to route data move: " << *data_move
-                   << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
-                   << " @t=" << src_loc.time_step
-                   << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+                   << " from " << src_loc.resource->getType() << "#"
+                   << src_loc.resource->getId() << " @t=" << src_loc.time_step
+                   << " to " << target_loc.resource->getType() << "#"
+                   << target_loc.resource->getId()
                    << " @t=" << target_loc.time_step << "\n";
       mapping_state.unbindOp(op);
-      mapping_state.releaseRoute(data_move);
+      for (Operation *routed_op : routed_operands) {
+        llvm::errs() << "[DEBUG] Releasing route for routed operand: "
+                     << *routed_op << "\n";
+        mapping_state.releaseRoute(routed_op);
+      }
       return false;
     }
     // Checks whether the operation's user is a ctrl_mov.
@@ -621,25 +679,41 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, Map
       auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
       llvm::errs() << "[DEBUG] Found ctrl_mov user: " << *ctrl_mov << "\n";
       assert(ctrl_mov && "Expected user to be a CtrlMovOp");
-      mlir::Operation *materialized_backward_op = getMaterializedBackwardUser(ctrl_mov);
+      mlir::Operation *materialized_backward_op =
+          getMaterializedBackwardUser(ctrl_mov);
       assert(isa<neura::PhiOp>(materialized_backward_op) &&
              "Expected materialized operation of ctrl_mov to be a PhiOp");
       // Gets the last location of the materialized operation.
-      MappingLoc backward_loc = mapping_state.getAllLocsOfOp(materialized_backward_op).back();
+      MappingLoc backward_loc =
+          mapping_state.getAllLocsOfOp(materialized_backward_op).back();
       // Routes the ctrl_mov to the phi location.
       std::vector<MappingLoc> route_path;
-      if (tryRouteBackwardMove(ctrl_mov, target_loc, backward_loc, mapping_state, route_path)) {
+      if (tryRouteBackwardMove(ctrl_mov, target_loc, backward_loc,
+                               mapping_state, route_path)) {
         mapping_state.reserveRoute(ctrl_mov, route_path);
+        routed_ctrl_movs.push_back(ctrl_mov);
         llvm::errs() << "[DEBUG] Successfully routed ctrl_mov: " << *ctrl_mov
-                     << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+                     << " to " << backward_loc.resource->getType() << "#"
+                     << backward_loc.resource->getId()
                      << " @t=" << backward_loc.time_step << "\n";
         continue;
       }
       llvm::errs() << "[DEBUG] Failed to route ctrl_mov: " << *ctrl_mov
-                   << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+                   << " to " << backward_loc.resource->getType() << "#"
+                   << backward_loc.resource->getId()
                    << " @t=" << backward_loc.time_step << "\n";
       mapping_state.unbindOp(op);
-      mapping_state.releaseRoute(ctrl_mov);
+      for (Operation *routed_ctrl_mov : routed_ctrl_movs) {
+        llvm::errs() << "[DEBUG] Releasing route for routed ctrl_mov: "
+                     << *routed_ctrl_mov << "\n";
+        mapping_state.releaseRoute(routed_ctrl_mov);
+      }
+
+      for (Operation *routed_op : routed_operands) {
+        llvm::errs() << "[DEBUG] Releasing route for routed operand: "
+                     << *routed_op << "\n";
+        mapping_state.releaseRoute(routed_op);
+      }
       return false;
     }
     return true;
diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
index 699d22cc..36fd4302 100644
--- a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
+++ b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -1,16 +1,18 @@
 #include <deque>
 
 #include "NeuraDialect/Architecture/Architecture.h"
+#include "NeuraDialect/Mapping/HeuristicMapping/HeuristicMapping.h"
+#include "NeuraDialect/Mapping/MappingState.h"
+#include "NeuraDialect/Mapping/mapping_util.h"
 #include "NeuraDialect/NeuraDialect.h"
 #include "NeuraDialect/NeuraOps.h"
-#include "NeuraDialect/NeuraTypes.h"
 #include "NeuraDialect/NeuraPasses.h"
-#include "NeuraDialect/Mapping/MappingState.h"
-#include "NeuraDialect/Mapping/mapping_util.h"
+#include "NeuraDialect/NeuraTypes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace mlir;
 using namespace mlir::neura;
@@ -33,60 +35,128 @@ struct MapToAcceleratorPass
     registry.insert<mlir::neura::NeuraDialect>();
   }
 
+  MapToAcceleratorPass() = default;
+  MapToAcceleratorPass(const MapToAcceleratorPass &pass)
+      : PassWrapper<MapToAcceleratorPass, OperationPass<ModuleOp>>(pass) {}
+  Option<std::string> mappingStrategy{
+      *this, "mapping-strategy",
+      llvm::cl::desc("Mapping strategy to use for mapping operations to the "
+                     "accelerator. Options: greedy, exhaustive, "
+                     "heuristic=max_loc,max_depth (default "
+                     "max_loc=5, max_depth=3)"),
+      llvm::cl::init("heuristic")};
+
   void runOnOperation() override {
     ModuleOp module = getOperation();
 
+    StringRef mappingStrategy_stringRef(mappingStrategy.getValue());
+    // Creates a mapping strategy based on the provided option.
+    std::unique_ptr<MappingStrategy> mapping_strategy;
+    if (mappingStrategy_stringRef == "greedy") {
+      mapping_strategy = std::make_unique<HeuristicMapping>(INT_MAX, 1);
+    } else if (mappingStrategy_stringRef == "exhaustive") {
+      mapping_strategy = std::make_unique<HeuristicMapping>(INT_MAX, INT_MAX);
+    } else if (mappingStrategy_stringRef == "heuristic") {
+      mapping_strategy = std::make_unique<HeuristicMapping>(
+          5, 3); // Randomly picked default values for max_loc and max_depth
+    } else if (mappingStrategy_stringRef.starts_with("heuristic=")) {
+      // Used for custom backtrack parameters.
+      // Example: "heuristic=5,3" means max_loc=5, max_depth=3
+      // Extracts the parameters after "heuristic=".
+      StringRef paramsRef =
+          mappingStrategy_stringRef.substr(strlen("heuristic="));
+      size_t comma_pos = paramsRef.find(',');
+
+      if (comma_pos != StringRef::npos) {
+        StringRef max_loc_str = paramsRef.substr(0, comma_pos);
+        StringRef max_depth_str = paramsRef.substr(comma_pos + 1);
+
+        int max_loc, max_depth;
+        if (!max_loc_str.getAsInteger(10, max_loc) &&
+            !max_depth_str.getAsInteger(10, max_depth)) {
+          mapping_strategy =
+              std::make_unique<HeuristicMapping>(max_loc, max_depth);
+          llvm::errs()
+              << "[MapToAcceleratorPass] Use custom backtrack parameters: "
+              << "max_location_to_try=" << max_loc
+              << ", max_backtrack_depth=" << max_depth << "\n";
+        } else {
+          llvm::errs()
+              << "[MapToAcceleratorPass] Illegal backtrack parameters format: "
+              << mappingStrategy << "\n";
+          return;
+        }
+      } else {
+        llvm::errs()
+            << "[MapToAcceleratorPass] Illegal backtrack parameters format: "
+            << mappingStrategy << "\n";
+        return;
+      }
+    } else {
+      llvm::errs() << "[MapToAcceleratorPass] Unsupported mapping strategy: "
+                   << mappingStrategy << "\n";
+      return;
+    }
+
     module.walk([&](func::FuncOp func) {
       // Skips functions not targeting the neura accelerator.
       auto accel_attr = func->getAttrOfType<StringAttr>("accelerator");
-      if (!accel_attr || accel_attr.getValue() != "neura")
+      if (!accel_attr || accel_attr.getValue() != "neura") {
         return;
+      }
 
       // Collects and reports recurrence cycles found in the function.
       auto recurrence_cycles = collectRecurrenceCycles(func);
       RecurrenceCycle *longest = nullptr;
       int rec_mii = 1;
       for (auto &cycle : recurrence_cycles) {
-        if (!longest || cycle.length > longest->length)
+        llvm::outs() << "[DEBUG] Recurrence cycle (length " << cycle.length
+                     << "):\n";
+        for (Operation *op : cycle.operations) {
+          llvm::outs() << "  " << *op << "\n";
+        }
+        if (!longest || cycle.length > longest->length) {
           longest = &cycle;
+        }
       }
 
       if (longest) {
-        llvm::errs() << "[MapToAcceleratorPass] Longest recurrence cycle (length "
-                    << longest->length << "):\n";
+        llvm::outs()
+            << "[MapToAcceleratorPass] Longest recurrence cycle (length "
+            << longest->length << "):\n";
         for (Operation *op : longest->operations) {
-          op->print(llvm::errs()), llvm::errs() << "\n";
+          op->print(llvm::outs()), llvm::outs() << "\n";
         }
         rec_mii = longest->length;
-        IntegerAttr rec_mii_attr = IntegerAttr::get(
-            IntegerType::get(func.getContext(), 32), rec_mii);
+        IntegerAttr rec_mii_attr =
+            IntegerAttr::get(IntegerType::get(func.getContext(), 32), rec_mii);
         func->setAttr("RecMII", rec_mii_attr);
       }
 
       // AcceleratorConfig config{/*numTiles=*/8}; // Example
       Architecture architecture(4, 4);
       int res_mii = calculateResMii(func, architecture);
-      IntegerAttr res_mii_attr = IntegerAttr::get(
-          IntegerType::get(func.getContext(), 32), res_mii);
+      IntegerAttr res_mii_attr =
+          IntegerAttr::get(IntegerType::get(func.getContext(), 32), res_mii);
       func->setAttr("ResMII", res_mii_attr);
 
       const int minII = std::min(rec_mii, res_mii);
       constexpr int maxII = 10;
-      std::vector<Operation*> sorted_ops = getTopologicallySortedOps(func);
+      std::vector<Operation *> sorted_ops = getTopologicallySortedOps(func);
       for (Operation *op : sorted_ops) {
-        llvm::errs() << "[MapToAcceleratorPass] sorted op: "
-                      << *op << "\n";
+        llvm::outs() << "[MapToAcceleratorPass] sorted op: " << *op << "\n";
       }
       for (int ii = minII; ii <= maxII; ++ii) {
         MappingState mapping_state(architecture, ii);
-        if (tryHeuristicMapping(sorted_ops, architecture, mapping_state)) {
+        if (mapping_strategy->map(sorted_ops, architecture, mapping_state)) {
           // success
-          llvm::errs() << "[MapToAcceleratorPass] Successfully mapped function '"
+          llvm::errs() << "[MapToAcceleratorPass] Successfully mapped function "
                        << func.getName() << "' with II = " << ii << "\n";
           mapping_state.dumpOpToLocs(); // logs to stderr
           mapping_state.encodeMappingState();
-          func->setAttr("CompiledII", IntegerAttr::get(
-              IntegerType::get(func.getContext(), 32), ii));
+          func->setAttr(
+              "CompiledII",
+              IntegerAttr::get(IntegerType::get(func.getContext(), 32), ii));
           break;
         }
         llvm::errs() << "[DEBUG] mapping failed for II = " << ii << "\n";
diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir
index 60937c3e..83f87d11 100644
--- a/test/neura/ctrl/branch_for.mlir
+++ b/test/neura/ctrl/branch_for.mlir
@@ -11,32 +11,31 @@
 // RUN:   --transform-ctrl-to-data-flow \
 // RUN:   | FileCheck %s -check-prefix=CTRL2DATA
 
-// TODO: Enable the following tests once the ctrl2data is refactored.
-// RU: mlir-neura-opt %s \
-// RU:   --assign-accelerator \
-// RU:   --lower-llvm-to-neura \
-// RU:   --leverage-predicated-value \
-// RU:   --transform-ctrl-to-data-flow \
-// RU:   --insert-data-mov \
-// RU:   | FileCheck %s -check-prefix=MOV
+// RUN: mlir-neura-opt %s \
+// RUN:   --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
+// RUN:   | FileCheck %s -check-prefix=MOV
 
-// RU: mlir-neura-opt %s \
-// RU:   --assign-accelerator \
-// RU:   --lower-llvm-to-neura \
-// RU:   --leverage-predicated-value \
-// RU:   --transform-ctrl-to-data-flow \
-// RU:   --insert-data-mov \
-// RU:   --map-to-accelerator \
-// RU:   | FileCheck %s -check-prefix=MAPPING
+// RUN: mlir-neura-opt %s \
+// RUN:   --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
+// RUN:   --map-to-accelerator="mapping-strategy=heuristic" \
+// RUN:   | FileCheck %s -check-prefix=MAPPING
 
-// RU: mlir-neura-opt %s \
-// RU:   --assign-accelerator \
-// RU:   --lower-llvm-to-neura \
-// RU:   --leverage-predicated-value \
-// RU:   --transform-ctrl-to-data-flow \
-// RU:   --insert-data-mov \
-// RU:   --map-to-accelerator \
-// RU:   --generate-code
+// RUN: mlir-neura-opt %s \
+// RUN:   --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
+// RUN:   --map-to-accelerator="mapping-strategy=heuristic" \
+// RUN:   --generate-code
  
 // RU: FileCheck %s --input-file=generated-instructions.json -check-prefix=INST
 
@@ -103,99 +102,99 @@ func.func @loop_test() -> f32 {
 // CTRL2DATA-NEXT:   }
 
 // MOV:      func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
-// MOV-NEXT:   %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data<i64, i1>
-// MOV-NEXT:   %1 = "neura.data_mov"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %2 = "neura.grant_always"(%1) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data<i64, i1>
-// MOV-NEXT:   %4 = "neura.data_mov"(%3) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %5 = "neura.grant_once"(%4) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data<i64, i1>
-// MOV-NEXT:   %7 = "neura.data_mov"(%6) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %8 = "neura.grant_always"(%7) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data<f32, i1>
-// MOV-NEXT:   %10 = "neura.data_mov"(%9) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %11 = "neura.grant_always"(%10) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
-// MOV-NEXT:   %13 = "neura.data_mov"(%12) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %14 = "neura.grant_once"(%13) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %15 = neura.reserve : !neura.data<i64, i1>
-// MOV-NEXT:   %16 = "neura.data_mov"(%5) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %17 = "neura.phi"(%16, %15) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %18 = neura.reserve : !neura.data<f32, i1>
-// MOV-NEXT:   %19 = "neura.data_mov"(%14) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %20 = "neura.phi"(%19, %18) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %21 = "neura.data_mov"(%20) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %22 = "neura.data_mov"(%11) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %23 = "neura.fadd"(%21, %22) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %24 = "neura.data_mov"(%17) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %25 = "neura.data_mov"(%8) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %26 = "neura.add"(%24, %25) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %27 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %28 = "neura.data_mov"(%2) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %30 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %31 = "neura.not"(%30) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %32 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %33 = "neura.data_mov"(%31) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %34 = neura.grant_predicate %32, %33 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
-// MOV-NEXT:   %35 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   %36 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %37 = neura.grant_predicate %35, %36 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
-// MOV-NEXT:   neura.ctrl_mov %37 -> %18 : !neura.data<f32, i1> !neura.data<f32, i1>
-// MOV-NEXT:   %38 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MOV-NEXT:   %39 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MOV-NEXT:   %40 = neura.grant_predicate %38, %39 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
-// MOV-NEXT:   neura.ctrl_mov %40 -> %15 : !neura.data<i64, i1> !neura.data<i64, i1>
-// MOV-NEXT:   %41 = "neura.data_mov"(%34) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MOV-NEXT:   "neura.return"(%41) : (!neura.data<f32, i1>) -> ()
-// MOV-NEXT: }
+// MOV-NEXT:     %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:     %1 = "neura.data_mov"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %2 = "neura.grant_always"(%1) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:     %4 = "neura.data_mov"(%3) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %5 = "neura.grant_once"(%4) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:     %7 = "neura.data_mov"(%6) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %8 = "neura.grant_always"(%7) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data<f32, i1>
+// MOV-NEXT:     %10 = "neura.data_mov"(%9) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %11 = "neura.grant_always"(%10) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
+// MOV-NEXT:     %13 = "neura.data_mov"(%12) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %14 = "neura.grant_once"(%13) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %15 = neura.reserve : !neura.data<f32, i1>
+// MOV-NEXT:     %16 = "neura.data_mov"(%14) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %17 = "neura.phi"(%15, %16) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %18 = neura.reserve : !neura.data<i64, i1>
+// MOV-NEXT:     %19 = "neura.data_mov"(%5) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %20 = "neura.phi"(%18, %19) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %21 = "neura.data_mov"(%17) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %22 = "neura.data_mov"(%11) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %23 = "neura.fadd"(%21, %22) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %24 = "neura.data_mov"(%20) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %25 = "neura.data_mov"(%8) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %26 = "neura.add"(%24, %25) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %27 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %28 = "neura.data_mov"(%2) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %30 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:     %31 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %32 = neura.grant_predicate %30, %31 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MOV-NEXT:     neura.ctrl_mov %32 -> %18 : !neura.data<i64, i1> !neura.data<i64, i1>
+// MOV-NEXT:     %33 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %34 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %35 = neura.grant_predicate %33, %34 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MOV-NEXT:     neura.ctrl_mov %35 -> %15 : !neura.data<f32, i1> !neura.data<f32, i1>
+// MOV-NEXT:     %36 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %37 = "neura.not"(%36) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %38 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     %39 = "neura.data_mov"(%37) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:     %40 = neura.grant_predicate %38, %39 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MOV-NEXT:     %41 = "neura.data_mov"(%40) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:     "neura.return"(%41) : (!neura.data<f32, i1>) -> ()
+// MOV-NEXT:   }
 
-// MAPPING:      func.func @loop_test() -> f32 attributes {CompiledII = 6 : i32, RecMII = 4 : i32, ResMII = 1 : i32, accelerator = "neura"} {
-// MAPPING-NEXT:   %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %1 = "neura.data_mov"(%0) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %2 = "neura.grant_always"(%1) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %4 = "neura.data_mov"(%3) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %5 = "neura.grant_once"(%4) {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %7 = "neura.data_mov"(%6) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %8 = "neura.grant_always"(%7) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %10 = "neura.data_mov"(%9) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %11 = "neura.grant_always"(%10) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %13 = "neura.data_mov"(%12) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %14 = "neura.grant_once"(%13) {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %15 = neura.reserve : !neura.data<i64, i1>
-// MAPPING-NEXT:   %16 = "neura.data_mov"(%5) {mapping_locs = [{id = 19 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %17 = "neura.phi"(%16, %15) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %18 = neura.reserve : !neura.data<f32, i1>
-// MAPPING-NEXT:   %19 = "neura.data_mov"(%14) {mapping_locs = [{id = 43 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %20 = "neura.phi"(%19, %18) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %21 = "neura.data_mov"(%20) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %22 = "neura.data_mov"(%11) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %23 = "neura.fadd"(%21, %22) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %24 = "neura.data_mov"(%17) {mapping_locs = [{id = 14 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %25 = "neura.data_mov"(%8) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %26 = "neura.add"(%24, %25) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %27 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %28 = "neura.data_mov"(%2) {mapping_locs = [{id = 15 : i32, resource = "link", time_step = 1 : i32}, {id = 11 : i32, resource = "link", time_step = 2 : i32}, {id = 26 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 4 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %30 = "neura.data_mov"(%29) {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %31 = "neura.not"(%30) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 5 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %32 = "neura.data_mov"(%23) {mapping_locs = [{id = 31 : i32, resource = "link", time_step = 3 : i32}, {id = 17 : i32, resource = "link", time_step = 4 : i32}, {id = 6 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %33 = "neura.data_mov"(%31) {mapping_locs = [{id = 13 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %34 = neura.grant_predicate %32, %33 {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 6 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %35 = "neura.data_mov"(%23) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   %36 = "neura.data_mov"(%29) {mapping_locs = [{id = 30 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %37 = neura.grant_predicate %35, %36 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
-// MAPPING-NEXT:   neura.ctrl_mov %37 -> %18 {mapping_locs = []} : !neura.data<f32, i1> !neura.data<f32, i1>
-// MAPPING-NEXT:   %38 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
-// MAPPING-NEXT:   %39 = "neura.data_mov"(%29) {mapping_locs = []} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
-// MAPPING-NEXT:   %40 = neura.grant_predicate %38, %39 {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
-// MAPPING-NEXT:   neura.ctrl_mov %40 -> %15 {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 5 : i32}, {id = 27 : i32, resource = "link", time_step = 6 : i32}, {id = 27 : i32, resource = "link", time_step = 7 : i32}]} : !neura.data<i64, i1> !neura.data<i64, i1>
-// MAPPING-NEXT:   %41 = "neura.data_mov"(%34) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
-// MAPPING-NEXT:   "neura.return"(%41) {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 7 : i32}]} : (!neura.data<f32, i1>) -> ()
+// MAPPING:   func.func @loop_test() -> f32 attributes {CompiledII = 4 : i32, RecMII = 4 : i32, ResMII = 1 : i32, accelerator = "neura"} {
+// MAPPING-NEXT:   %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 0 : i32, x = 1 : i32, y = 1 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %1 = "neura.data_mov"(%0) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %2 = "neura.grant_always"(%1) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 1 : i32, x = 1 : i32, y = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 0 : i32, x = 1 : i32, y = 2 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %4 = "neura.data_mov"(%3) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %5 = "neura.grant_once"(%4) {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 1 : i32, x = 1 : i32, y = 2 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 0 : i32, x = 2 : i32, y = 1 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %7 = "neura.data_mov"(%6) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %8 = "neura.grant_always"(%7) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 1 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 0 : i32, x = 2 : i32, y = 2 : i32}]} : () -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %10 = "neura.data_mov"(%9) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %11 = "neura.grant_always"(%10) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 1 : i32, x = 2 : i32, y = 2 : i32}]} : () -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %13 = "neura.data_mov"(%12) {mapping_locs = [{id = 33 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %14 = "neura.grant_once"(%13) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %15 = neura.reserve : !neura.data<f32, i1>
+// MAPPING-NEXT:     %16 = "neura.data_mov"(%14) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %17 = "neura.phi"(%15, %16) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %18 = neura.reserve : !neura.data<i64, i1>
+// MAPPING-NEXT:     %19 = "neura.data_mov"(%5) {mapping_locs = [{id = 19 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %20 = "neura.phi"(%18, %19) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 2 : i32, x = 1 : i32, y = 1 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %21 = "neura.data_mov"(%17) {mapping_locs = [{id = 28 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %22 = "neura.data_mov"(%11) {mapping_locs = [{id = 32 : i32, resource = "link", time_step = 2 : i32}, {id = 44 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %23 = "neura.fadd"(%21, %22) {mapping_locs = [{id = 13 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %24 = "neura.data_mov"(%20) {mapping_locs = [{id = 15 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %25 = "neura.data_mov"(%8) {mapping_locs = [{id = 29 : i32, resource = "link", time_step = 1 : i32}, {id = 24 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %26 = "neura.add"(%24, %25) {mapping_locs = [{id = 4 : i32, resource = "tile", time_step = 3 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %27 = "neura.data_mov"(%26) {mapping_locs = [{id = 11 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %28 = "neura.data_mov"(%2) {mapping_locs = [{id = 14 : i32, resource = "link", time_step = 1 : i32}, {id = 29 : i32, resource = "link", time_step = 2 : i32}, {id = 29 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> {mapping_locs = [{id = 8 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 0 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %30 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:     %31 = "neura.data_mov"(%29) {mapping_locs = [{id = 24 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %32 = neura.grant_predicate %30, %31 {mapping_locs = [{id = 4 : i32, resource = "tile", time_step = 5 : i32, x = 1 : i32, y = 0 : i32}]} : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MAPPING-NEXT:     neura.ctrl_mov %32 -> %18 {mapping_locs = [{id = 12 : i32, resource = "link", time_step = 5 : i32}]} : !neura.data<i64, i1> !neura.data<i64, i1>
+// MAPPING-NEXT:     %33 = "neura.data_mov"(%23) {mapping_locs = [{id = 41 : i32, resource = "link", time_step = 4 : i32}, {id = 38 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %34 = "neura.data_mov"(%29) {mapping_locs = []} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %35 = neura.grant_predicate %33, %34 {mapping_locs = [{id = 8 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 0 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MAPPING-NEXT:     neura.ctrl_mov %35 -> %15 {mapping_locs = [{id = 26 : i32, resource = "link", time_step = 6 : i32}]} : !neura.data<f32, i1> !neura.data<f32, i1>
+// MAPPING-NEXT:     %36 = "neura.data_mov"(%29) {mapping_locs = []} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %37 = "neura.not"(%36) {mapping_locs = [{id = 8 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 0 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %38 = "neura.data_mov"(%23) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %39 = "neura.data_mov"(%37) {mapping_locs = [{id = 25 : i32, resource = "link", time_step = 5 : i32}, {id = 39 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:     %40 = neura.grant_predicate %38, %39 {mapping_locs = [{id = 13 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MAPPING-NEXT:     %41 = "neura.data_mov"(%40) {mapping_locs = [{id = 42 : i32, resource = "link", time_step = 7 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:     "neura.return"(%41) {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 8 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data<f32, i1>) -> ()
 // MAPPING-NEXT: }
 
 // INST:        "name": "neura.fadd",