diff --git a/include/NeuraDialect/Architecture/Architecture.h b/include/NeuraDialect/Architecture/Architecture.h
index f40ae183..2d560e75 100644
--- a/include/NeuraDialect/Architecture/Architecture.h
+++ b/include/NeuraDialect/Architecture/Architecture.h
@@ -11,6 +11,12 @@
 namespace mlir {
 namespace neura {
 
+// Enum for identifying resource type.
+enum class ResourceKind {
+  Tile,
+  Link,
+};
+
 //===----------------------------------------------------------------------===//
 // BasicResource: abstract base class for Tile, Link, etc.
 //===----------------------------------------------------------------------===//
@@ -20,6 +26,7 @@ class BasicResource {
   virtual ~BasicResource() = default;
   virtual int getId() const = 0;
   virtual std::string getType() const = 0;
+  virtual ResourceKind getKind() const = 0;
 };
 
 //===----------------------------------------------------------------------===//
@@ -37,6 +44,12 @@ class Tile : public BasicResource {
   int getId() const override;
   std::string getType() const override { return "tile"; }
 
+  ResourceKind getKind() const override { return ResourceKind::Tile; }
+
+  static bool classof(const BasicResource *res) {
+    return res && res->getKind() == ResourceKind::Tile;
+  }
+
   int getX() const;
   int getY() const;
 
@@ -64,8 +77,14 @@ class Link : public BasicResource {
   Link(int id);
 
   int getId() const override;
+
   std::string getType() const override { return "link"; }
 
+  ResourceKind getKind() const override { return ResourceKind::Link; }
+
+  static bool classof(const BasicResource *res) {
+    return res && res->getKind() == ResourceKind::Link;
+  }
   Tile* getSrcTile() const;
   Tile* getDstTile() const;
 
@@ -83,7 +102,8 @@ struct PairHash {
   }
 };
 
-/// Describes the entire CGRA architecture.
+// Describes the CGRA architecture template.
+// TODO: Model architecture in detail (e.g., registers, ports).
 class Architecture {
 public:
   Architecture(int width, int height);
@@ -98,8 +118,9 @@ class Architecture {
   std::vector<Link*> getAllLinks() const;
 
 private:
+  // TODO: Model architecture in detail, e.g., ports, registers, crossbars, etc.
+  // https://github.com/coredac/dataflow/issues/52.
   std::vector<std::unique_ptr<Tile>> tile_storage;
-//   std::vector<Tile*> tiles;
   std::vector<std::unique_ptr<Link>> link_storage;
   std::unordered_map<int, Tile*> id_to_tile;
   std::unordered_map<std::pair<int, int>, Tile*, PairHash> coord_to_tile;
diff --git a/include/NeuraDialect/Mapping/MappingState.h b/include/NeuraDialect/Mapping/MappingState.h
index ed10b265..5736f654 100644
--- a/include/NeuraDialect/Mapping/MappingState.h
+++ b/include/NeuraDialect/Mapping/MappingState.h
@@ -2,7 +2,8 @@
 #define NEURA_MAPPING_STATE_H
 
 #include "mlir/IR/Operation.h"
-#include "NeuraDialect/Architecture/Architecture.h"  // for BasicResource
+#include "NeuraDialect/Architecture/Architecture.h"
+#include "llvm/Support/raw_ostream.h"
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -19,6 +20,12 @@ struct MappingLoc {
   bool operator==(const MappingLoc &other) const {
     return resource == other.resource && time_step == other.time_step;
   }
+
+  bool operator<(const MappingLoc &other) const {
+    if (time_step != other.time_step)
+      return time_step < other.time_step;
+    return resource->getId() < other.resource->getId();
+  }
 };
 
 } // namespace neura
@@ -43,44 +50,65 @@ class MappingState {
 public:
   MappingState(const Architecture &arch, int II);
   // Binds a (tile/link, time_step) location to an operation.
-  void bindOp(MappingLoc loc, Operation *op);
+  bool bindOp(const MappingLoc &loc, Operation *op);
+
+  // Unbinds an operation from its (tile/link, time_step) location,
+  // which is useful for backtracking.
+  void unbindOp(Operation *op);
 
   // Checks if a (tile/link, time_step) is available (unoccupied).
-  bool isAvailable(const MappingLoc &loc) const;
+  // Note that the check is performed in II granularity.
+  // For example, if II is 4, and we want to check (tile 2, step 5), then
+  // it will check (tile 2, step 1), (tile 2, step 5), (tile 2, step 9), etc.
+  bool isAvailableAcrossTime(const MappingLoc &loc) const;
 
   // Gets the operation at a specific (tile/link, time_step) location.
   std::optional<Operation*> getOpAt(MappingLoc loc) const;
 
+  // Counts the number of operations at a specific resource across time steps.
+  int countOpsAtResource(BasicResource *resource) const;
+
   // Gets all MRRG nodes.
-  const std::unordered_set<MappingLoc> &getAllLocs() const;
+  const std::set<MappingLoc> &getAllLocs() const;
+
+  // Gets all MRRG nodes allocated to a given op.
+  const std::vector<MappingLoc> &getAllLocsOfOp(Operation *op) const;
+
+  // Reserves links for an move operation.
+  void reserveRoute(Operation *op, ArrayRef<MappingLoc> path);
+
+  // Releases links for an move operation.
+  void releaseRoute(Operation *op);
 
   // Gets neighboring tiles on next step of a given MappingLoc.
-  const std::vector<MappingLoc> &getNextStepTiles(MappingLoc loc) const;
+  std::vector<MappingLoc> getNextStepTiles(MappingLoc loc) const;
 
-  // Gets neighboring links on next step of a given MappingLoc.
-  const std::vector<MappingLoc> &getNextStepLinks(MappingLoc loc) const;
+//   // Gets neighboring links on next step of a given MappingLoc.
+//   const std::vector<MappingLoc> &getNextStepLinks(MappingLoc loc) const;
 
-  // Gets neighboring tiles on current step of a given MappingLoc.
-  const std::vector<MappingLoc> &getCurrentStepTiles(MappingLoc loc) const;
+//   // Gets neighboring tiles on current step of a given MappingLoc.
+//   const std::vector<MappingLoc> &getCurrentStepTiles(MappingLoc loc) const;
 
   // Gets neighboring links on current step of a given MappingLoc.
-  const std::vector<MappingLoc> &getCurrentStepLinks(MappingLoc loc) const;
+  std::vector<MappingLoc> getCurrentStepLinks(MappingLoc loc) const;
 
+  // Gets the target initiation interval (II) for the mapping.
   int getII() const { return II; }
 
+  // Embeds the mapping states onto the mapped operations.
+  void encodeMappingState();
+
+  void dumpOpToLocs(llvm::raw_ostream &os = llvm::errs()) const;
+
 private:
   // Initiation interval.
   int II;
-  std::unordered_set<MappingLoc> all_locs;
-  // current and next step tiles and links for a given MappingLoc. Note that
-  // the key MappingLoc is either a pair of (tile, time_step) or (link, time_step).
-  std::unordered_map<MappingLoc, std::vector<MappingLoc>> next_step_tiles;
-  std::unordered_map<MappingLoc, std::vector<MappingLoc>> next_step_links;
-  std::unordered_map<MappingLoc, std::vector<MappingLoc>> current_step_tiles;
-  std::unordered_map<MappingLoc, std::vector<MappingLoc>> current_step_links;
-
-  std::unordered_map<MappingLoc, Operation*> loc_to_op;
-  std::unordered_set<MappingLoc> occupied_locs;
+  static constexpr int kMaxSteps = 10;
+
+  std::set<MappingLoc> all_locs;
+  std::set<MappingLoc> occupied_locs;
+  std::map<MappingLoc, Operation*> loc_to_op;
+  std::map<Operation*, std::vector<MappingLoc>> op_to_locs;
 };
 
 } // namespace neura
diff --git a/include/NeuraDialect/Mapping/mapping_util.h b/include/NeuraDialect/Mapping/mapping_util.h
index ca3a4b45..1864dd5e 100644
--- a/include/NeuraDialect/Mapping/mapping_util.h
+++ b/include/NeuraDialect/Mapping/mapping_util.h
@@ -2,6 +2,7 @@
 
 #include "mlir/IR/Operation.h"
 #include "NeuraDialect/Architecture/Architecture.h"
+#include "NeuraDialect/Mapping/MappingState.h"
 
 namespace mlir {
 namespace neura {
@@ -18,5 +19,70 @@ SmallVector<RecurrenceCycle, 4> collectRecurrenceCycles(Operation *func_op);
 // Calculates ResMII: ceil(#ops / #tiles).
 int calculateResMii(Operation *func_op, const Architecture &architecture);
 
+// Returns topologically sorted operations in func_op.
+std::vector<Operation *> getTopologicallySortedOps(Operation *func_op);
+
+Operation* getMaterializedProducer(Value operand);
+
+// Collects the real users of an operation, excluding ctrl_mov and data_mov.
+llvm::SmallVector<mlir::Operation *> getMaterializedUserOps(Operation *op);
+
+// Gets the last materialized backward user of an operation, which is expected
+// to be a phi operation.
+Operation *getMaterializedBackwardUser(Operation *op);
+
+// Attempts to map a function operation to the accelerator using heuristics.
+bool tryHeuristicMapping(std::vector<Operation *> &sorted_ops,
+                         const Architecture &architecture,
+                         MappingState &mapping_state);
+
+// Attempts to route a data move operation from src_loc to dst_loc.
+bool tryRouteDataMove(Operation *mov,
+                      MappingLoc src_loc,
+                      MappingLoc dst_loc,
+                      bool is_backward_move,
+                      const MappingState &mapping_state,
+                      std::vector<MappingLoc> &path_out);
+
+bool tryRouteForwardMove(Operation *mov_op,
+                         MappingLoc src_loc,
+                         MappingLoc dst_loc,
+                         const MappingState &state,
+                         std::vector<MappingLoc> &path_out);
+
+bool tryRouteBackwardMove(Operation *mov_op,
+                           MappingLoc src_loc,
+                           MappingLoc dst_loc,
+                           const MappingState &state,
+                           std::vector<MappingLoc> &path_out);
+
+// Calculates the cost of mapping locations for a given op, the returned locations
+// are sorted based on the cost.
+std::vector<MappingLoc> calculateCost(Operation *op, const MappingState &mapping_state);
+
+// Gets the ctrl_mov users of an operation, empty vector is returned if no ctrl_mov users found.
+llvm::SmallVector<Operation *> getCtrlMovUsers(Operation *op);
+
+// Maps a materialized operation to the accelerator, and routes the dataflow from
+// the producers to the given op.
+bool placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state);
+
+std::vector<MappingLoc> calculateAward(Operation *op,
+                                       const Architecture &architecture,
+                                       const MappingState &mapping_state);
+
+void updateAward(std::map<MappingLoc, int> &locs_with_award,
+                 MappingLoc loc, int award);
+
+bool canReachLocInTime(const MappingLoc &src_loc,
+                       const MappingLoc &dst_loc,
+                       int deadline_step,
+                       const MappingState &mapping_state);
+
+bool canReachLocInTime(const std::vector<Operation *> &producers,
+                       const MappingLoc &target_loc,
+                       int deadline_step,
+                       const MappingState &mapping_state);
+
 } // namespace neura
 } // namespace mlir
diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h
index 8fa986f2..8b444603 100644
--- a/include/NeuraDialect/NeuraPasses.h
+++ b/include/NeuraDialect/NeuraPasses.h
@@ -25,6 +25,7 @@ std::unique_ptr<mlir::Pass> createAssignAcceleratorPass();
 std::unique_ptr<mlir::Pass> createTransformCtrlToDataFlowPass();
 std::unique_ptr<mlir::Pass> createLeveragePredicatedValuePass();
 std::unique_ptr<mlir::Pass> createMapToAcceleratorPass();
+std::unique_ptr<mlir::Pass> createGenerateCodePass();
 
 #define GEN_PASS_REGISTRATION
 #include "NeuraDialect/NeuraPasses.h.inc"
diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td
index 426fe6d0..823a4b55 100644
--- a/include/NeuraDialect/NeuraPasses.td
+++ b/include/NeuraDialect/NeuraPasses.td
@@ -57,4 +57,14 @@ def MapToAccelerator : Pass<"map-to-accelerator", "ModuleOp"> {
   }];
   let constructor = "neura::createMapToAcceleratorPass()";
 }
+
+def GenerateCode : Pass<"generate-code", "ModuleOp"> {
+  let summary = "Generate JSON-formatted instructions from mapped Neura IR";
+  let description = [{
+    This pass generates JSON file containing the instructions.
+    The instructions can be encoded into configuration signals.
+  }];
+  let constructor = "neura::createGenerateCodePass()";
+}
+
 #endif // NEURA_PASSES_TD
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/MappingState.cpp b/lib/NeuraDialect/Mapping/MappingState.cpp
index f4189429..75485ae4 100644
--- a/lib/NeuraDialect/Mapping/MappingState.cpp
+++ b/lib/NeuraDialect/Mapping/MappingState.cpp
@@ -1,37 +1,48 @@
 #include "NeuraDialect/Mapping/MappingState.h"
+#include "mlir/IR/BuiltinTypes.h"
 
 using namespace mlir;
 using namespace mlir::neura;
 
 MappingState::MappingState(const Architecture &arch, int II) : II(II) {
+  // TODO: Use number of operations to determine the max steps for constructing MRRG.
   for (Tile* tile : arch.getAllTiles()) {
-    for (int t = 0; t < II; ++t) {
+    for (int t = 0; t < II * kMaxSteps; ++t) {
       MappingLoc loc = {tile, t};
       all_locs.insert(loc);
-
-      // Create edges to neighboring tiles at t+1.
-      for (Tile* dst : tile->getDstTiles()) {
-        MappingLoc next_step_dst_tile_loc = {dst, (t + 1) % II}; // modulo II for reuse
-        next_step_tiles[loc].push_back(next_step_dst_tile_loc);
-      }
-
-      // TODO: Not sure whether we need the link on t or t+1.
-      // Creates edges to neighboring links at t.
-      for (Link* dst : tile->getOutLinks()) {
-        MappingLoc current_step_dst_link_loc = {dst, t % II};
-        next_step_tiles[loc].push_back(current_step_dst_link_loc);
-      }
     }
   }
 }
 
-void MappingState::bindOp(MappingLoc loc, Operation *op) {
+bool MappingState::bindOp(const MappingLoc &loc, Operation *op) {
   loc_to_op[loc] = op;
   occupied_locs.insert(loc);
+  auto it = op_to_locs.find(op);
+  assert (it == op_to_locs.end() && "Operation already has reserved locations");
+  op_to_locs[op].push_back(loc);
+  return true;
 }
 
-bool MappingState::isAvailable(const MappingLoc &loc) const {
-  return occupied_locs.find(loc) == occupied_locs.end();
+void MappingState::unbindOp(Operation *op) {
+  auto it = op_to_locs.find(op);
+  if (it == op_to_locs.end()) return;
+
+  for (const MappingLoc &loc : it->second) {
+    loc_to_op.erase(loc);
+    occupied_locs.erase(loc);
+  }
+
+  op_to_locs.erase(it);
+}
+
+bool MappingState::isAvailableAcrossTime(const MappingLoc &loc) const {
+  for (int t = loc.time_step % II; t < II * kMaxSteps; t += II) {
+    MappingLoc checkLoc = loc;
+    checkLoc.time_step = t;
+    if (occupied_locs.find(checkLoc) != occupied_locs.end())
+      return false;
+  }
+  return true;
 }
 
 std::optional<Operation*> MappingState::getOpAt(MappingLoc loc) const {
@@ -40,30 +51,161 @@ std::optional<Operation*> MappingState::getOpAt(MappingLoc loc) const {
   return it->second;
 }
 
-const std::unordered_set<MappingLoc> &MappingState::getAllLocs() const {
+int MappingState::countOpsAtResource(BasicResource *resource) const {
+  int count = 0;
+  for (const auto &[loc, op] : loc_to_op) {
+    if (loc.resource == resource) {
+      count++;
+    }
+  }
+  return count;
+}
+
+const std::set<MappingLoc> &MappingState::getAllLocs() const {
   return all_locs;
 }
 
-const std::vector<MappingLoc> &MappingState::getNextStepTiles(MappingLoc loc) const {
+const std::vector<MappingLoc> &MappingState::getAllLocsOfOp(Operation *op) const {
+  auto it = op_to_locs.find(op);
+  if (it != op_to_locs.end())
+    return it->second;
+
   static const std::vector<MappingLoc> empty;
-  auto it = next_step_tiles.find(loc);
-  return it != next_step_tiles.end() ? it->second : empty;
+  return empty;
 }
 
-const std::vector<MappingLoc> &MappingState::getNextStepLinks(MappingLoc loc) const {
-  static const std::vector<MappingLoc> empty;
-  auto it = next_step_links.find(loc);
-  return it != next_step_links.end() ? it->second : empty;
+std::vector<MappingLoc> MappingState::getNextStepTiles(MappingLoc loc) const {
+  std::vector<MappingLoc> next_step_tiles;
+  const int next_step = loc.time_step + 1;
+  assert(next_step < II * kMaxSteps && "Next step exceeds max steps");
+  // Collects neighboring tiles at t+1 for both tile and link.
+  if (loc.resource->getKind() == ResourceKind::Tile) {
+    Tile *tile = dyn_cast<Tile>(loc.resource);
+    for (Tile* dst : tile->getDstTiles()) {
+      MappingLoc next_step_dst_tile_loc = {dst, next_step};
+      next_step_tiles.push_back(next_step_dst_tile_loc);
+    }
+    // Includes self for reuse.
+    next_step_tiles.push_back({tile, next_step});
+  } else if (loc.resource->getKind() == ResourceKind::Link) {
+    Link *link = dyn_cast<Link>(loc.resource);
+    Tile* dst = link->getDstTile();
+    MappingLoc next_step_dst_tile_loc = {dst, next_step};
+    next_step_tiles.push_back(next_step_dst_tile_loc);
+  }
+  return next_step_tiles;
 }
 
-const std::vector<MappingLoc> &MappingState::getCurrentStepTiles(MappingLoc loc) const {
-  static const std::vector<MappingLoc> empty;
-  auto it = current_step_tiles.find(loc);
-  return it != current_step_tiles.end() ? it->second : empty;
+// const std::vector<MappingLoc> &MappingState::getNextStepLinks(MappingLoc loc) const {
+//   static const std::vector<MappingLoc> empty;
+//   auto it = next_step_links.find(loc);
+//   return it != next_step_links.end() ? it->second : empty;
+// }
+
+// const std::vector<MappingLoc> &MappingState::getCurrentStepTiles(MappingLoc loc) const {
+//   static const std::vector<MappingLoc> empty;
+//   auto it = current_step_tiles.find(loc);
+//   return it != current_step_tiles.end() ? it->second : empty;
+// }
+
+std::vector<MappingLoc> MappingState::getCurrentStepLinks(MappingLoc loc) const {
+  assert((loc.resource->getKind() == ResourceKind::Tile) &&
+         "Current step links can only be queried for tiles");
+  std::vector<MappingLoc> current_step_links;
+  const int current_step = loc.time_step;
+  assert(current_step < II * kMaxSteps && "Current step exceeds max steps");
+  // Collects neighboring tiles at t for given tile.
+  Tile *tile = dyn_cast<Tile>(loc.resource);
+  for (Link* out_link : tile->getOutLinks()) {
+    MappingLoc current_step_out_link_loc = {out_link, current_step};
+    current_step_links.push_back(current_step_out_link_loc);
+  }
+  return current_step_links;
 }
 
-const std::vector<MappingLoc> &MappingState::getCurrentStepLinks(MappingLoc loc) const {
-  static const std::vector<MappingLoc> empty;
-  auto it = current_step_links.find(loc);
-  return it != current_step_links.end() ? it->second : empty;
+void MappingState::reserveRoute(Operation *op, ArrayRef<MappingLoc> path) {
+
+  // Records all mapping locations.
+  assert(op_to_locs.find(op) == op_to_locs.end() &&
+         "Operation already has reserved locations");
+  op_to_locs[op] = std::vector<MappingLoc>(path.begin(), path.end());
+
+  for (const MappingLoc &loc : path) {
+    assert(occupied_locs.find(loc) == occupied_locs.end() &&
+           "Mapping location already occupied");
+    loc_to_op[loc] = op;
+    assert(occupied_locs.find(loc) == occupied_locs.end() &&
+           "Mapping location already occupied in occupied_locs");
+    occupied_locs.insert(loc);
+  }
+}
+
+void MappingState::releaseRoute(Operation *op) {
+  auto it = op_to_locs.find(op);
+  if (it == op_to_locs.end())
+    return;
+
+  const std::vector<MappingLoc> &route = it->second;
+
+  for (const MappingLoc &loc : route) {
+    loc_to_op.erase(loc);
+    occupied_locs.erase(loc);
+  }
+
+  op_to_locs.erase(it);
+}
+
+void MappingState::dumpOpToLocs(llvm::raw_ostream &os) const {
+  os << "=== MappingState: op_to_locs ===\n";
+
+  for (const auto &[op, locs] : op_to_locs) {
+    os << "  - " << op->getName();
+    if (auto name_attr = op->getAttrOfType<StringAttr>("sym_name"))
+      os << " @" << name_attr;
+    os << "\n";
+
+    for (const MappingLoc &loc : locs) {
+      auto *res = loc.resource;
+      os << "      -> " << res->getType() << "#" << res->getId()
+         << " @t=" << loc.time_step << "\n";
+    }
+  }
+
+  os << "=== End ===\n";
+}
+
+void MappingState::encodeMappingState() {
+  for (const auto &[op, locs] : op_to_locs) {
+    llvm::SmallVector<mlir::Attribute, 4> mapping_entries;
+    auto ctx = op->getContext();
+    for (const MappingLoc &loc : locs) {
+      std::string kind_str;
+      if (loc.resource->getKind() == ResourceKind::Tile) {
+        kind_str = "tile";
+      } else if (loc.resource->getKind() == ResourceKind::Link) {
+        kind_str = "link";
+      } else {
+        kind_str = "unknown";
+      }
+      auto dict = mlir::DictionaryAttr::get(
+        ctx,
+        {
+          mlir::NamedAttribute(
+            mlir::StringAttr::get(ctx, "resource"),
+            mlir::StringAttr::get(ctx, kind_str)
+          ),
+          mlir::NamedAttribute(
+            mlir::StringAttr::get(ctx, "id"),
+            mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.resource->getId())
+          ),
+          mlir::NamedAttribute(
+            mlir::StringAttr::get(ctx, "time_step"),
+            mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 32), loc.time_step)
+          )
+        }
+      );
+      mapping_entries.push_back(dict);
+    }
+    op->setAttr("mapping_locs", mlir::ArrayAttr::get(ctx, mapping_entries));
+  }
 }
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp
index 3c724a7e..e0736669 100644
--- a/lib/NeuraDialect/Mapping/mapping_util.cpp
+++ b/lib/NeuraDialect/Mapping/mapping_util.cpp
@@ -1,9 +1,13 @@
 #include <deque>
+#include <queue>
 
 #include "NeuraDialect/Mapping/mapping_util.h"
 #include "NeuraDialect/NeuraOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include <cassert>
 #include "mlir/IR/Operation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 
 using namespace mlir;
 using namespace mlir::neura;
@@ -27,10 +31,18 @@ void traverseAlongPath(Operation *op, Value reserve_value,
       Operation *res_op = reserve_value.getDefiningOp();
       if (res_op) current_path.push_front(res_op);
 
-      constexpr int kNumExcludedOps = 2;
+      int effective_length = 0;
+      for (Operation *op : current_path) {
+        // Skips the non-materialized ops when counting the cycle length.
+        if (!isa<neura::ReserveOp,
+                 neura::CtrlMovOp,
+                 neura::DataMovOp>(op)) {
+          ++effective_length;
+        }
+      }
       collected_paths.push_back(RecurrenceCycle{
         operations: SmallVector<Operation *>(current_path.begin(), current_path.end()),
-        length: static_cast<int>(current_path.size()) - kNumExcludedOps
+        length: static_cast<int>(effective_length)
       });
 
       if (res_op) current_path.pop_front();
@@ -71,7 +83,6 @@ SmallVector<RecurrenceCycle, 4> mlir::neura::collectRecurrenceCycles(Operation *
 
     for (auto &cycle : collected_paths) {
       cycle.operations.push_back(ctrl_mov_op);
-      ++cycle.length;
       recurrence_cycles.push_back(std::move(cycle));
     }
   });
@@ -89,8 +100,8 @@ int mlir::neura::calculateResMii(Operation *func_op,
     if (isa<func::FuncOp>(op) ||
         isa<neura::ConstantOp,
             neura::CtrlMovOp,
-            neura::ReserveOp,
-            neura::ReturnOp>(op)) {
+            neura::DataMovOp,
+            neura::ReserveOp>(op)) {
       return;
     }
     ++num_ops;
@@ -103,3 +114,535 @@ int mlir::neura::calculateResMii(Operation *func_op,
 
   return llvm::divideCeil(num_ops, num_tiles);
 }
+
+std::vector<Operation *> mlir::neura::getTopologicallySortedOps(Operation *func_op) {
+  std::vector<Operation *> sorted_ops;
+  llvm::DenseMap<Operation *, int> pending_deps;
+  std::deque<Operation *> ready_queue;
+
+  // Collects recurrence cycle ops.
+  auto recurrence_cycles = collectRecurrenceCycles(func_op);
+  llvm::DenseSet<Operation *> recurrence_ops;
+  for (const auto &cycle : recurrence_cycles)
+    for (Operation *op : cycle.operations)
+      recurrence_ops.insert(op);
+
+  // Counts unresolved dependencies for each op.
+  func_op->walk([&](Operation *op) {
+    if (op == func_op) return;
+    int dep_count = 0;
+    for (Value operand : op->getOperands())
+      if (operand.getDefiningOp())
+        ++dep_count;
+    pending_deps[op] = dep_count;
+    if (dep_count == 0) {
+      // TODO: Prioritize recurrence ops. But cause compiled II regression.
+      // https://github.com/coredac/dataflow/issues/59.
+      if (recurrence_ops.contains(op)) {
+        // ready_queue.push_front(op);
+        ready_queue.push_back(op);
+      } else {
+        ready_queue.push_back(op);
+      }
+    }
+  });
+
+  // BFS-style topological sort with recurrence priority.
+  while (!ready_queue.empty()) {
+    Operation *op = ready_queue.front();
+    ready_queue.pop_front();
+    sorted_ops.push_back(op);
+
+    for (Value result : op->getResults()) {
+      for (Operation *user : result.getUsers()) {
+        if (--pending_deps[user] == 0) {
+          // TODO: Prioritize recurrence ops. But cause compiled II regression.
+          // https://github.com/coredac/dataflow/issues/59.
+          if (recurrence_ops.contains(user)) {
+            // ready_queue.push_front(user);
+            ready_queue.push_back(user);
+          } else {
+            ready_queue.push_back(user);
+          }
+        }
+      }
+    }
+  }
+
+  return sorted_ops;
+}
+
+mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
+  assert(isa<neura::CtrlMovOp>(op) && "Expected a ctrl_mov operation");
+  auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(op);
+  Value target = ctrl_mov.getTarget();
+
+  assert(isa<neura::ReserveOp>(target.getDefiningOp()) &&
+         "Expected the user of ctrl_mov target to be a reserve operation");
+  auto reserve_op = dyn_cast<neura::ReserveOp>(target.getDefiningOp());
+
+  // Skip ctrl_mov users of reserve; return the first phi user.
+  for (Operation *user : reserve_op.getResult().getUsers()) {
+    if (isa<neura::CtrlMovOp>(user)) continue; // skip ctrl_mov user
+    if (isa<neura::PhiOp>(user)) return user;
+  }
+  assert(false && "No materialized backward user (i.e., phi) found for ctrl_mov");
+}
+
+llvm::SmallVector<mlir::Operation *> mlir::neura::getMaterializedUserOps(Operation *op) {
+  llvm::SmallVector<Operation *> result;
+  llvm::DenseSet<Operation *> visited;
+  visited.insert(op);
+  llvm::errs() << "Starting to collect materialized users for: " << *op << "\n";
+  llvm::SmallVector<Operation *> worklist(op->getUsers().begin(), op->getUsers().end());
+
+  while (!worklist.empty()) {
+    Operation *curr = worklist.pop_back_val();
+    llvm::errs() << "Visiting operation: " << *curr << "\n";
+    if (!visited.insert(curr).second) {
+      llvm::errs() << "Already visited, so skip: " << *curr << "\n";
+      continue;
+    }
+
+    if (isa<neura::DataMovOp>(curr)) {
+      for (Operation *next : curr->getUsers()) {
+        if (visited.insert(next).second) {
+          // Only adds the next operation if it hasn't been visited yet.
+          worklist.push_back(next);
+        }
+      }
+      continue;
+    }
+
+    // Specially handles the ctrl_mov, i.e., the second operand of ctrl_mov is
+    // treated as a target/destination/user in terms of dataflow.
+    if (auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(curr)) {
+      Value target = ctrl_mov.getTarget();
+      for (Operation *user : target.getUsers()) {
+        if (visited.insert(user).second) {
+          worklist.push_back(user);
+        }
+      }
+      continue;
+    }
+
+    // Materialized op
+    result.push_back(curr);
+  }
+
+  for (Operation *res : result) {
+    llvm::errs() << "Materialized user: " << *res << "\n";
+  }
+  return result;
+}
+
+bool mlir::neura::tryRouteForwardMove(Operation *mov_op,
+                                      MappingLoc src_loc,
+                                      MappingLoc dst_loc,
+                                      const MappingState &state,
+                                      std::vector<MappingLoc> &path_out) {
+  return tryRouteDataMove(mov_op, src_loc, dst_loc, false, state, path_out);
+}
+
+bool mlir::neura::tryRouteBackwardMove(Operation *mov_op,
+                                       MappingLoc src_loc,
+                                       MappingLoc dst_loc,
+                                       const MappingState &state,
+                                       std::vector<MappingLoc> &path_out) {
+  llvm::errs() << "[tryRouteBackwardMove] src_loc: " << src_loc.resource->getType()
+            << "#" << src_loc.resource->getId()
+            << " @t=" << src_loc.time_step
+            << ", dst_loc: " << dst_loc.resource->getType()
+            << "#" << dst_loc.resource->getId()
+            << " @t=" << dst_loc.time_step << "\n";
+  return tryRouteDataMove(mov_op, src_loc, dst_loc, true, state, path_out);
+}
+
+bool mlir::neura::tryRouteDataMove(Operation *mov_op,
+                                   MappingLoc src_loc,
+                                   MappingLoc dst_loc,
+                                   bool is_backward_move,
+                                   const MappingState &state,
+                                   std::vector<MappingLoc> &path_out) {
+  // Specially handles the case where src and dst are the same tile.
+  if (src_loc.resource == dst_loc.resource) {
+    return true;
+  }
+  struct QueueEntry {
+    Tile *tile;
+    int time;
+    std::vector<MappingLoc> path;
+  };
+
+  Tile *src_tile = dyn_cast<Tile>(src_loc.resource);
+  Tile *dst_tile = dyn_cast<Tile>(dst_loc.resource);
+
+  std::queue<QueueEntry> queue;
+  std::set<Tile*> visited;
+
+  queue.push({src_tile, src_loc.time_step, {}});
+  visited.insert(src_tile);
+
+  // Tolerates the deadline step by II for backward moves (as the data should
+  // arrive at the next iteration).
+  const int deadline_step = dst_loc.time_step + (is_backward_move ? state.getII() : 0);
+
+  // BFS-style search for a path from src_tile to dst_tile.
+  while (!queue.empty()) {
+    auto [current_tile, current_time, current_path] = queue.front();
+    queue.pop();
+
+    if (current_tile == dst_tile) {
+      // Confirms path reaches the target tile no later than deadline step.
+      if (current_time <= deadline_step) {
+        // Either arrives exactly right before the dst starts computation.
+        // So the current_time on the target tile is the same as deadline step.
+        if (current_time == deadline_step) {
+          path_out = current_path;
+          return true;
+        }
+
+        // The last link can be held from arrival_time to dst_time - 1.
+        // TODO: We actually don't need to occupy the last link if the registers
+        // within the tile can be explicitly represented.
+        // https://github.com/coredac/dataflow/issues/52.
+        bool all_free = true;
+        assert(!current_path.empty() && "Path should not be empty when checking last link");
+        MappingLoc last_link = current_path.back();
+        std::vector<MappingLoc> last_link_occupying;
+        for (int t = current_time; t < deadline_step; ++t) {
+          MappingLoc repeated{last_link.resource, t};
+          last_link_occupying.push_back(repeated);
+          if (!state.isAvailableAcrossTime(repeated)) {
+            all_free = false;
+            break;
+          }
+        }
+        if (all_free) {
+          path_out = current_path;
+          path_out.insert(path_out.end(), last_link_occupying.begin(), last_link_occupying.end());
+          return true;
+        }
+
+      } else {
+        // Arrives too late, not schedulable.
+        continue;
+      }
+    }
+
+    for (MappingLoc current_step_next_link : state.getCurrentStepLinks({current_tile, current_time})) {
+      if (!state.isAvailableAcrossTime(current_step_next_link)) continue;
+
+      Link *next_link = dyn_cast<Link>(current_step_next_link.resource);
+      Tile *next_tile = next_link->getDstTile();
+      int next_time = current_time + 1;
+
+      if (!visited.insert(next_tile).second) continue;
+
+      std::vector<MappingLoc> extended_path = current_path;
+      extended_path.push_back(current_step_next_link);
+      queue.push({next_tile, next_time, std::move(extended_path)});
+    }
+  }
+
+  return false;
+}
+
+Operation* mlir::neura::getMaterializedProducer(Value operand) {
+  Operation *producer = operand.getDefiningOp();
+  assert(isa<neura::DataMovOp>(producer) && "Expected operand to be defined by a DataMovOp");
+  // Finds the actual producer.
+  auto mov_op = dyn_cast<neura::DataMovOp>(producer);
+  auto materialized_producer = mov_op.getOperand().getDefiningOp();
+  return materialized_producer;
+}
+
+bool mlir::neura::tryHeuristicMapping(std::vector<Operation *> &sorted_ops,
+                                      const Architecture &architecture,
+                                      MappingState &mapping_state) {
+  DenseSet<Operation *> visited;
+
+  for (Operation *op : sorted_ops) {
+    // TODO: Build up util func to distinguish materialized and non-materialized ops.
+    if (isa<neura::DataMovOp, neura::CtrlMovOp, neura::ReserveOp>(op))
+      continue;
+
+    std::vector<MappingLoc> sorted_locs = calculateAward(op, architecture, mapping_state);
+    // auto target_loc = getLocWithMinCost(loc_with_cost);
+    if (sorted_locs.empty()) {
+      llvm::errs() << "[DEBUG] No locations found for op: " << *op << "\n";
+      return false; // No locations available for this operation.
+    }
+    assert(!sorted_locs.empty() &&
+           "No locations found for the operation to map");
+    MappingLoc target_loc = sorted_locs.front();
+    if (placeAndRoute(op, target_loc, mapping_state)) {
+      llvm::errs() << "[DEBUG] Successfully scheduled op: " << *op
+                   << " at loc: " << target_loc.resource->getType()
+                   << "#" << target_loc.resource->getId()
+                   << " @t=" << target_loc.time_step << "\n";
+      continue;
+    } else {
+      llvm::errs() << "[DEBUG] Failed to schedule op: " << *op << "; target loc: " << target_loc.resource->getType() << "#" << target_loc.resource->getId() << " @t=" << target_loc.time_step << "\n";
+    }
+    // TODO: Optimization -- backtrack a few times if failed to schedule the op.
+    // https://github.com/coredac/dataflow/issues/59
+    return false;
+  }
+
+  return true;
+}
+
+bool mlir::neura::canReachLocInTime(const std::vector<Operation *> &producers,
+                                    const MappingLoc &target_loc,
+                                    int deadline_step,
+                                    const MappingState &mapping_state) {
+
+  for (Operation *producer : producers) {
+    // Get the last location of the producer.
+    auto producer_locs = mapping_state.getAllLocsOfOp(producer);
+    assert(!producer_locs.empty() && "No locations found for producer");
+
+    MappingLoc producer_loc = producer_locs.back();
+    if (!canReachLocInTime(producer_loc, target_loc, deadline_step, mapping_state)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc,
+                                    const MappingLoc &dst_loc,
+                                    int deadline_step,
+                                    const MappingState &mapping_state) {
+  // Checks if the destination is reachable from the source within the given time window.
+  if (src_loc.resource == dst_loc.resource &&
+      dst_loc.time_step <= deadline_step) {
+    return true;
+  }
+
+  // Checks if the destination is reachable from the source tile within given steps.
+  assert(isa<Tile>(src_loc.resource));
+  assert(isa<Tile>(dst_loc.resource));
+
+  struct QueueEntry {
+    MappingLoc loc;
+    int current_time;
+  };
+
+  std::queue<QueueEntry> queue;
+  llvm::DenseSet<Tile *> visited;
+
+  queue.push({src_loc, src_loc.time_step});
+  visited.insert(dyn_cast<Tile>(src_loc.resource));
+
+  while (!queue.empty()) {
+    auto [current_loc, current_time] = queue.front();
+    queue.pop();
+
+    // If we reach the destination tile and time step is not after dst_loc
+    if (current_loc.resource == dst_loc.resource &&
+        current_time <= dst_loc.time_step &&
+        dst_loc.time_step <= deadline_step) {
+      return true;
+    }
+
+    if (current_time >= deadline_step)
+      continue;
+
+    // Explores all next step tiles from the current location.
+    for (const MappingLoc &next_loc : mapping_state.getNextStepTiles(current_loc)) {
+      if (!mapping_state.isAvailableAcrossTime(next_loc))
+        continue;
+
+      int next_time = current_time + 1;
+      if (next_time > deadline_step)
+        continue;
+
+      Tile *next_tile = llvm::dyn_cast<Tile>(next_loc.resource);
+      assert(next_tile && "Next location must be a Tile");
+      if (visited.contains(next_tile)) {
+        continue;
+      }
+
+      visited.insert(next_tile);
+
+      MappingLoc next_step_loc = next_loc;
+      next_step_loc.time_step = next_time;
+
+      queue.push({next_step_loc, next_time});
+    }
+  }
+
+  return false;
+}
+
+void mlir::neura::updateAward(std::map<MappingLoc, int> &locs_with_award,
+                              MappingLoc loc, int award) {
+  // Updates the award of the top element in the priority queue.
+  if (locs_with_award.find(loc) != locs_with_award.end()) {
+    locs_with_award[loc] += award;
+  } else {
+    locs_with_award[loc] = award;
+  }
+}
+
+std::vector<MappingLoc> mlir::neura::calculateAward(Operation *op,
+                                                    const Architecture &architecture,
+                                                    const MappingState &mapping_state) {
+  // A heap of locations with their associated award. Note that we use a max-heap
+  // to prioritize locations with higher awards.
+  std::map<MappingLoc, int> locs_with_award;
+
+  // Assembles all the producers.
+  std::vector<Operation *> producers;
+  for (Value operand : op->getOperands()) {
+    if (isa<neura::ReserveOp>(operand.getDefiningOp())) {
+      // Skips Reserve ops (backward ctrl move) when estimate cost.
+      continue;
+    }
+    Operation *producer = getMaterializedProducer(operand);
+    assert(producer && "Expected a materialized producer");
+    producers.push_back(producer);
+  }
+
+  llvm::errs() << "[calculateAward] Operation: " << *op
+             << "; Producers: " << producers.size() << "\n";
+  for (Tile *tile : architecture.getAllTiles()) {
+    int earliest_start_time_step = 0;
+    for (Operation *producer : producers) {
+      std::vector<MappingLoc> producer_locs = mapping_state.getAllLocsOfOp(producer);
+      assert(!producer_locs.empty() && "No locations found for producer");
+
+      MappingLoc producer_loc = producer_locs.back();
+      earliest_start_time_step = std::max(earliest_start_time_step,
+                                          producer_loc.time_step + 1);
+    }
+    int award = mapping_state.getII() + tile->getDstTiles().size();
+    for (int t = earliest_start_time_step;
+         t < earliest_start_time_step + mapping_state.getII(); t += 1) {
+      MappingLoc tile_loc_candidate = {tile, t};
+      // If the tile at time `t` is available, we can consider it for mapping.
+      if (mapping_state.isAvailableAcrossTime(tile_loc_candidate)) {
+        // If no producer or the location is reachable by all producers,
+        // we can consider it for mapping and grant reward.
+        if (producers.empty() ||
+            canReachLocInTime(producers,
+                                  tile_loc_candidate,
+                                  t,
+                                  mapping_state)) {
+          updateAward(locs_with_award, tile_loc_candidate, award);
+        }
+      }
+      // The mapping location with earlier time step is granted with a higher award.
+      award -= 1;
+    }
+    assert(award >= 0 && "Award should not be negative");
+  }
+
+  // Copies map entries into a vector of pairs for sorting.
+  std::vector<std::pair<MappingLoc, int>> locs_award_vec(locs_with_award.begin(), locs_with_award.end());
+
+  // Sorts by award (descending).
+  std::sort(locs_award_vec.begin(), locs_award_vec.end(),
+            [](const std::pair<MappingLoc, int> &a, const std::pair<MappingLoc, int> &b) {
+              return a.second > b.second;
+            });
+  // TODO: Needs to handle tie case and prioritize lower resource utilization, however,
+  // compiled II becomes worse after adding this tie-breaker: https://github.com/coredac/dataflow/issues/59.
+  // std::sort(locs_award_vec.begin(), locs_award_vec.end(),
+  //           [&](const std::pair<MappingLoc, int> &a, const std::pair<MappingLoc, int> &b) {
+  //               if (a.second != b.second) {
+  //                 return a.second > b.second;
+  //               }
+  //               // Tie-breaker: prioritizes lower resource utilization and earlier time step.
+  //               if (a.first.time_step != b.first.time_step) {
+  //                 return a.first.time_step > b.first.time_step;
+  //               }
+  //               const bool is_resource_a_lower_utilized =
+  //                   mapping_state.countOpsAtResource(a.first.resource) >
+  //                   mapping_state.countOpsAtResource(b.first.resource);
+  //               return is_resource_a_lower_utilized;
+  //             });
+
+  // Extracts just the MappingLocs, already sorted by award.
+  std::vector<MappingLoc> sorted_locs;
+  sorted_locs.reserve(locs_award_vec.size());
+  for (const auto &pair : locs_award_vec)
+    sorted_locs.push_back(pair.first);
+
+  return sorted_locs;
+}
+
+llvm::SmallVector<Operation *> mlir::neura::getCtrlMovUsers(Operation *op) {
+  llvm::SmallVector<Operation *> result;
+  for (Operation *user : op->getUsers()) {
+    if (isa<neura::CtrlMovOp>(user)) {
+      result.push_back(user);
+    }
+  }
+  return result;
+}
+
+bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc, MappingState &mapping_state) {
+  if (mapping_state.bindOp(target_loc, op)) {
+    // Tries to route the data move operations.
+    for (Value operand : op->getOperands()) {
+      if (isa<neura::ReserveOp>(operand.getDefiningOp())) {
+        // Skips Reserve ops (backward ctrl move) when estimate cost.
+        continue;
+      }
+      Operation *data_move = operand.getDefiningOp();
+      assert(isa<neura::DataMovOp>(data_move) && "Expected a DataMovOp as operand producer");
+      Operation *producer = getMaterializedProducer(operand);
+      MappingLoc src_loc = mapping_state.getAllLocsOfOp(producer).back();
+
+      std::vector<MappingLoc> route_path;
+      if (tryRouteForwardMove(data_move, src_loc, target_loc, mapping_state, route_path)) {
+        mapping_state.reserveRoute(data_move, route_path);
+        llvm::errs() << "[DEBUG] Successfully routed data move: " << *data_move
+                     << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
+                     << " @t=" << src_loc.time_step
+                     << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+                     << " @t=" << target_loc.time_step << "\n";
+        continue;
+      }
+      llvm::errs() << "[DEBUG] Failed to route data move: " << *data_move
+                   << " from " << src_loc.resource->getType() << "#" << src_loc.resource->getId()
+                   << " @t=" << src_loc.time_step
+                   << " to " << target_loc.resource->getType() << "#" << target_loc.resource->getId()
+                   << " @t=" << target_loc.time_step << "\n";
+      mapping_state.unbindOp(op);
+      mapping_state.releaseRoute(data_move);
+      return false;
+    }
+    // Checks whether the operation's user is a ctrl_mov.
+    for (Operation *user : getCtrlMovUsers(op)) {
+      auto ctrl_mov = dyn_cast<neura::CtrlMovOp>(user);
+      llvm::errs() << "[DEBUG] Found ctrl_mov user: " << *ctrl_mov << "\n";
+      assert(ctrl_mov && "Expected user to be a CtrlMovOp");
+      mlir::Operation *materialized_backward_op = getMaterializedBackwardUser(ctrl_mov);
+      assert(isa<neura::PhiOp>(materialized_backward_op) &&
+             "Expected materialized operation of ctrl_mov to be a PhiOp");
+      // Gets the last location of the materialized operation.
+      MappingLoc backward_loc = mapping_state.getAllLocsOfOp(materialized_backward_op).back();
+      // Routes the ctrl_mov to the phi location.
+      std::vector<MappingLoc> route_path;
+      if (tryRouteBackwardMove(ctrl_mov, target_loc, backward_loc, mapping_state, route_path)) {
+        mapping_state.reserveRoute(ctrl_mov, route_path);
+        llvm::errs() << "[DEBUG] Successfully routed ctrl_mov: " << *ctrl_mov
+                     << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+                     << " @t=" << backward_loc.time_step << "\n";
+        continue;
+      }
+      llvm::errs() << "[DEBUG] Failed to route ctrl_mov: " << *ctrl_mov
+                   << " to " << backward_loc.resource->getType() << "#" << backward_loc.resource->getId()
+                   << " @t=" << backward_loc.time_step << "\n";
+      mapping_state.unbindOp(op);
+      mapping_state.releaseRoute(ctrl_mov);
+      return false;
+    }
+    return true;
+  }
+  return false;
+}
\ No newline at end of file
diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt
index c1d16bdc..7d944fbb 100644
--- a/lib/NeuraDialect/Transforms/CMakeLists.txt
+++ b/lib/NeuraDialect/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_mlir_library(
     TransformCtrlToDataFlowPass.cpp
     LeveragePredicatedValuePass.cpp
     MapToAcceleratorPass.cpp
+    GenerateCodePass.cpp
 
     DEPENDS
     MLIRNeuraTransformsIncGen
diff --git a/lib/NeuraDialect/Transforms/GenerateCodePass.cpp b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp
new file mode 100644
index 00000000..6c223f83
--- /dev/null
+++ b/lib/NeuraDialect/Transforms/GenerateCodePass.cpp
@@ -0,0 +1,137 @@
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "NeuraDialect/NeuraPasses.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::neura;
+
+#define GEN_PASS_DEF_GenerateCode
+#include "NeuraDialect/NeuraPasses.h.inc"
+
+namespace {
+
+struct GenerateCodePass
+    : public PassWrapper<GenerateCodePass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(GenerateCodePass)
+
+  StringRef getArgument() const override { return "generate-code"; }
+  StringRef getDescription() const override {
+    return "Generates JSON code from mapped Neura IR.";
+  }
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<mlir::func::FuncDialect, mlir::neura::NeuraDialect>();
+  }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+
+    llvm::json::Array functions_array;
+
+    for (auto func : module.getOps<func::FuncOp>()) {
+      auto accel_attr = func->getAttrOfType<StringAttr>("accelerator");
+      if (!accel_attr || accel_attr.getValue() != "neura")
+        continue;
+
+      llvm::json::Object func_obj;
+      func_obj["name"] = func.getName().str();
+
+      if (auto ii_attr = func->getAttrOfType<IntegerAttr>("CompiledII"))
+        func_obj["CompiledII"] = ii_attr.getInt();
+      if (auto recMII_attr = func->getAttrOfType<IntegerAttr>("RecMII"))
+        func_obj["RecMII"] = recMII_attr.getInt();
+      if (auto resMII_attr = func->getAttrOfType<IntegerAttr>("ResMII"))
+        func_obj["ResMII"] = resMII_attr.getInt();
+
+      llvm::json::Array op_array;
+
+      func.walk([&](Operation *op) {
+        if (isa<func::ReturnOp>(op))
+          return;
+
+        llvm::json::Object op_obj;
+        op_obj["name"] = op->getName().getStringRef().str();
+
+        // Result types.
+        llvm::json::Array result_types;
+        for (auto result : op->getResults()) {
+        std::string type_str;
+        llvm::raw_string_ostream os(type_str);
+        result.getType().print(os);
+        result_types.push_back(os.str());
+        }
+        op_obj["result_types"] = std::move(result_types);
+
+        // Operands.
+        llvm::json::Array operand_indices;
+        for (Value operand : op->getOperands()) {
+          if (auto defining_op = operand.getDefiningOp())
+            operand_indices.push_back(defining_op->getName().getStringRef().str());
+          else
+            operand_indices.push_back("block_arg");
+        }
+        op_obj["operands"] = std::move(operand_indices);
+
+        // Constants.
+        if (auto const_op = mlir::dyn_cast<neura::ConstantOp>(op)) {
+          auto val_attr = const_op.getValue();
+          if (val_attr) {
+            if (auto int_attr = mlir::dyn_cast<IntegerAttr>(val_attr)) {
+              op_obj["constant_value"] = std::to_string(int_attr.getInt());
+            } else if (auto float_attr = mlir::dyn_cast<FloatAttr>(val_attr)) {
+              op_obj["constant_value"] = std::to_string(float_attr.getValueAsDouble());
+            }
+          }
+        }
+
+        // Mapping locs.
+        llvm::json::Array loc_array;
+        if (auto attr_array = op->getAttrOfType<ArrayAttr>("mapping_locs")) {
+          for (Attribute attr : attr_array) {
+            if (auto loc = mlir::dyn_cast<DictionaryAttr>(attr)) {
+              llvm::json::Object loc_obj;
+              if (auto idAttr = mlir::dyn_cast<IntegerAttr>(loc.get("id")))
+                loc_obj["id"] = idAttr.getInt();
+              if (auto resource_attr = mlir::dyn_cast<StringAttr>(loc.get("resource")))
+                loc_obj["resource"] = resource_attr.getValue().str();
+              if (auto timestep_attr = mlir::dyn_cast<IntegerAttr>(loc.get("time_step")))
+                loc_obj["time_step"] = timestep_attr.getInt();
+              loc_array.push_back(std::move(loc_obj));
+            }
+          }
+        }
+        op_obj["mapping_locs"] = std::move(loc_array);
+
+        op_array.push_back(std::move(op_obj));
+      });
+
+      func_obj["operations"] = std::move(op_array);
+      functions_array.push_back(std::move(func_obj));
+    }
+
+    // Final JSON object.
+    llvm::json::Object root;
+    root["functions"] = std::move(functions_array);
+
+    // llvm::outs() << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n";
+    std::error_code ec;
+    llvm::raw_fd_ostream json_out("generated-instructions.json", ec);
+    if (ec) {
+        getOperation()->emitError("Failed to open 'generated-instructions.json' for writing: " + ec.message());
+        return signalPassFailure();
+    }
+    json_out << llvm::formatv("{0:2}", llvm::json::Value(std::move(root))) << "\n";
+  }
+};
+
+} // namespace
+
+namespace mlir::neura {
+std::unique_ptr<mlir::Pass> createGenerateCodePass() {
+  return std::make_unique<GenerateCodePass>();
+}
+} // namespace mlir::neura
diff --git a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
index ea6d1ef8..a684c92f 100644
--- a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
+++ b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
@@ -23,32 +23,65 @@ struct InsertDataMovForNeuraOps : public RewritePattern {
       return failure();
     }
 
-    // Skips ops that already being inserted mov on the operands.
-    bool allInputsAreMov = llvm::all_of(op->getOperands(), [](Value v) {
-      return isa_and_nonnull<neura::DataMovOp>(v.getDefiningOp());
+    bool all_inputs_are_mov_except_reserve = llvm::all_of(op->getOperands(), [](Value v) {
+      Operation *def_op = v.getDefiningOp();
+      return isa_and_nonnull<neura::DataMovOp>(def_op) ||
+            isa_and_nonnull<neura::ReserveOp>(def_op);
     });
-    if (allInputsAreMov) {
-      return failure();
-    }
+
+    if (all_inputs_are_mov_except_reserve)
+      return failure(); // All operands are already handled
+
+    // // Skips ops that already being inserted mov on the operands.
+    // bool all_inputs_are_mov = llvm::all_of(op->getOperands(), [](Value v) {
+    //   return isa_and_nonnull<neura::DataMovOp>(v.getDefiningOp());
+    // });
+    // if (all_inputs_are_mov) {
+    //   return failure();
+    // }
+
+    // // Special case: skips rewriting phi if any operand is from reserve.
+    // if (isa<neura::PhiOp>(op)) {
+    //   bool has_reserved_input = llvm::any_of(op->getOperands(), [](Value v) {
+    //     return isa_and_nonnull<neura::ReserveOp>(v.getDefiningOp());
+    //   });
+
+    //   if (has_reserved_input)
+    //     return failure();  // Skip entire phi if any operand is reserved.
+    // }
 
     // Makes sure none of the operand has being processed.
-    bool hasAnyMovInput = llvm::any_of(op->getOperands(), [](Value v) {
+    bool has_any_mov_input = llvm::any_of(op->getOperands(), [](Value v) {
       return isa_and_nonnull<neura::DataMovOp>(v.getDefiningOp());
     });
-    assert(!hasAnyMovInput && "Unexpected: operand already wrapped in neura.mov");
+    if (has_any_mov_input)
+      llvm::errs() << "Warning: Operand already wrapped in neura.data_mov: " << *op << "\n";
+    assert(!has_any_mov_input && "Unexpected: operand already wrapped in neura.mov");
 
     Location loc = op->getLoc();
 
+    // Skips adding mov if the consumer is ctrl_mov.
+    if (isa<neura::CtrlMovOp>(op)) {
+      return failure(); // do not rewrite
+    }
+
     // Wraps operands in mov.
-    SmallVector<Value> newOperands;
+    SmallVector<Value> new_operands;
     for (Value operand : op->getOperands()) {
+      Operation *producer = operand.getDefiningOp();
+      // Skips adding mov for neura.reserve -> neura.phi.
+      if (isa<neura::PhiOp>(op) && producer && isa<neura::ReserveOp>(producer)) {
+        new_operands.push_back(operand);
+        continue;
+      }
+
       auto mov = rewriter.create<neura::DataMovOp>(loc, operand.getType(), operand);
-      newOperands.push_back(mov);
+      new_operands.push_back(mov);
     }
 
     // Clones op with new operands.
     OperationState state(loc, op->getName());
-    state.addOperands(newOperands);
+    state.addOperands(new_operands);
     state.addTypes(op->getResultTypes());
     state.addAttributes(op->getAttrs());
 
@@ -59,8 +92,8 @@ struct InsertDataMovForNeuraOps : public RewritePattern {
       }
     }
 
-    Operation *newOp = rewriter.create(state);
-    rewriter.replaceOp(op, newOp->getResults());
+    Operation *new_op = rewriter.create(state);
+    rewriter.replaceOp(op, new_op->getResults());
     return success();
   }
 };
diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
index 7831b50a..699d22cc 100644
--- a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
+++ b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -64,16 +64,33 @@ struct MapToAcceleratorPass
       }
 
       // AcceleratorConfig config{/*numTiles=*/8}; // Example
-      Architecture architecture(2, 2);
+      Architecture architecture(4, 4);
       int res_mii = calculateResMii(func, architecture);
       IntegerAttr res_mii_attr = IntegerAttr::get(
           IntegerType::get(func.getContext(), 32), res_mii);
       func->setAttr("ResMII", res_mii_attr);
 
       const int minII = std::min(rec_mii, res_mii);
-      constexpr int maxII = 5;
+      constexpr int maxII = 10;
+      std::vector<Operation*> sorted_ops = getTopologicallySortedOps(func);
+      for (Operation *op : sorted_ops) {
+        llvm::errs() << "[MapToAcceleratorPass] sorted op: "
+                      << *op << "\n";
+      }
       for (int ii = minII; ii <= maxII; ++ii) {
-        MappingState state(architecture, ii);
+        MappingState mapping_state(architecture, ii);
+        if (tryHeuristicMapping(sorted_ops, architecture, mapping_state)) {
+          // success
+          llvm::errs() << "[MapToAcceleratorPass] Successfully mapped function '"
+                       << func.getName() << "' with II = " << ii << "\n";
+          mapping_state.dumpOpToLocs(); // logs to stderr
+          mapping_state.encodeMappingState();
+          func->setAttr("CompiledII", IntegerAttr::get(
+              IntegerType::get(func.getContext(), 32), ii));
+          break;
+        }
+        llvm::errs() << "[DEBUG] mapping failed for II = " << ii << "\n";
+        mapping_state.dumpOpToLocs(); // logs to stderr
       }
     });
   }
diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir
index e656bbef..e5a57b8b 100644
--- a/test/neura/ctrl/branch_for.mlir
+++ b/test/neura/ctrl/branch_for.mlir
@@ -16,8 +16,28 @@
 // RUN:   --lower-llvm-to-neura \
 // RUN:   --leverage-predicated-value \
 // RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
+// RUN:   | FileCheck %s -check-prefix=MOV
+
+// RUN: mlir-neura-opt %s \
+// RUN:   --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
+// RUN:   --map-to-accelerator \
+// RUN:   | FileCheck %s -check-prefix=MAPPING
+
+// RUN: mlir-neura-opt %s \
+// RUN:   --assign-accelerator \
+// RUN:   --lower-llvm-to-neura \
+// RUN:   --leverage-predicated-value \
+// RUN:   --transform-ctrl-to-data-flow \
+// RUN:   --insert-data-mov \
 // RUN:   --map-to-accelerator \
-// RUN:   | FileCheck %s -check-prefix=MII
+// RUN:   --generate-code
+
+// RUN: FileCheck %s --input-file=generated-instructions.json -check-prefix=INST
 
 func.func @loop_test() -> f32 {
   %n = llvm.mlir.constant(10 : i64) : i64
@@ -81,4 +101,107 @@ func.func @loop_test() -> f32 {
 // CTRL2DATA-NEXT:   "neura.return"(%18) : (!neura.data<f32, i1>) -> ()
 // CTRL2DATA-NEXT: }
 
-// MII: func.func @loop_test() -> f32 attributes {RecMII = 4 : i32, ResMII = 4 : i32, accelerator = "neura"}
\ No newline at end of file
+// MOV:      func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// MOV-NEXT:   %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:   %1 = "neura.data_mov"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %2 = "neura.grant_always"(%1) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:   %4 = "neura.data_mov"(%3) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %5 = "neura.grant_once"(%4) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> : () -> !neura.data<i64, i1>
+// MOV-NEXT:   %7 = "neura.data_mov"(%6) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %8 = "neura.grant_always"(%7) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> : () -> !neura.data<f32, i1>
+// MOV-NEXT:   %10 = "neura.data_mov"(%9) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %11 = "neura.grant_always"(%10) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
+// MOV-NEXT:   %13 = "neura.data_mov"(%12) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %14 = "neura.grant_once"(%13) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %15 = neura.reserve : !neura.data<i64, i1>
+// MOV-NEXT:   %16 = "neura.data_mov"(%5) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %17 = "neura.phi"(%16, %15) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %18 = neura.reserve : !neura.data<f32, i1>
+// MOV-NEXT:   %19 = "neura.data_mov"(%14) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %20 = "neura.phi"(%19, %18) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %21 = "neura.data_mov"(%20) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %22 = "neura.data_mov"(%11) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %23 = "neura.fadd"(%21, %22) : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %24 = "neura.data_mov"(%17) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %25 = "neura.data_mov"(%8) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %26 = "neura.add"(%24, %25) : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %27 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %28 = "neura.data_mov"(%2) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %30 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %31 = "neura.not"(%30) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %32 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %33 = "neura.data_mov"(%31) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %34 = neura.grant_predicate %32, %33 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MOV-NEXT:   %35 = "neura.data_mov"(%23) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   %36 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %37 = neura.grant_predicate %35, %36 : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MOV-NEXT:   neura.ctrl_mov %37 -> %18 : !neura.data<f32, i1> !neura.data<f32, i1>
+// MOV-NEXT:   %38 = "neura.data_mov"(%26) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MOV-NEXT:   %39 = "neura.data_mov"(%29) : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MOV-NEXT:   %40 = neura.grant_predicate %38, %39 : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MOV-NEXT:   neura.ctrl_mov %40 -> %15 : !neura.data<i64, i1> !neura.data<i64, i1>
+// MOV-NEXT:   %41 = "neura.data_mov"(%34) : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MOV-NEXT:   "neura.return"(%41) : (!neura.data<f32, i1>) -> ()
+// MOV-NEXT: }
+
+// MAPPING:      func.func @loop_test() -> f32 attributes {CompiledII = 6 : i32, RecMII = 4 : i32, ResMII = 1 : i32, accelerator = "neura"} {
+// MAPPING-NEXT:   %0 = "neura.constant"() <{predicate = true, value = 10 : i64}> {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %1 = "neura.data_mov"(%0) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %2 = "neura.grant_always"(%1) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %3 = "neura.constant"() <{predicate = true, value = 0 : i64}> {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %4 = "neura.data_mov"(%3) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %5 = "neura.grant_once"(%4) {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %6 = "neura.constant"() <{predicate = true, value = 1 : i64}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %7 = "neura.data_mov"(%6) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %8 = "neura.grant_always"(%7) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %9 = "neura.constant"() <{predicate = true, value = 3.000000e+00 : f32}> {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %10 = "neura.data_mov"(%9) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %11 = "neura.grant_always"(%10) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %12 = "neura.constant"() <{predicate = true, value = 0.000000e+00 : f32}> {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 0 : i32}]} : () -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %13 = "neura.data_mov"(%12) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %14 = "neura.grant_once"(%13) {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %15 = neura.reserve : !neura.data<i64, i1>
+// MAPPING-NEXT:   %16 = "neura.data_mov"(%5) {mapping_locs = [{id = 19 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %17 = "neura.phi"(%16, %15) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %18 = neura.reserve : !neura.data<f32, i1>
+// MAPPING-NEXT:   %19 = "neura.data_mov"(%14) {mapping_locs = [{id = 43 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %20 = "neura.phi"(%19, %18) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 2 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %21 = "neura.data_mov"(%20) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %22 = "neura.data_mov"(%11) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %23 = "neura.fadd"(%21, %22) {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data<f32, i1>, !neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %24 = "neura.data_mov"(%17) {mapping_locs = [{id = 14 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %25 = "neura.data_mov"(%8) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %26 = "neura.add"(%24, %25) {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 3 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %27 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %28 = "neura.data_mov"(%2) {mapping_locs = [{id = 15 : i32, resource = "link", time_step = 1 : i32}, {id = 11 : i32, resource = "link", time_step = 2 : i32}, {id = 26 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %29 = "neura.icmp"(%27, %28) <{cmpType = "slt"}> {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 4 : i32}]} : (!neura.data<i64, i1>, !neura.data<i64, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %30 = "neura.data_mov"(%29) {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %31 = "neura.not"(%30) {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 5 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %32 = "neura.data_mov"(%23) {mapping_locs = [{id = 31 : i32, resource = "link", time_step = 3 : i32}, {id = 17 : i32, resource = "link", time_step = 4 : i32}, {id = 6 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %33 = "neura.data_mov"(%31) {mapping_locs = [{id = 13 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %34 = neura.grant_predicate %32, %33 {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 6 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %35 = "neura.data_mov"(%23) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   %36 = "neura.data_mov"(%29) {mapping_locs = [{id = 30 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %37 = neura.grant_predicate %35, %36 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data<f32, i1>, !neura.data<i1, i1> -> !neura.data<f32, i1>
+// MAPPING-NEXT:   neura.ctrl_mov %37 -> %18 {mapping_locs = []} : !neura.data<f32, i1> !neura.data<f32, i1>
+// MAPPING-NEXT:   %38 = "neura.data_mov"(%26) {mapping_locs = []} : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
+// MAPPING-NEXT:   %39 = "neura.data_mov"(%29) {mapping_locs = []} : (!neura.data<i1, i1>) -> !neura.data<i1, i1>
+// MAPPING-NEXT:   %40 = neura.grant_predicate %38, %39 {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 5 : i32}]} : !neura.data<i64, i1>, !neura.data<i1, i1> -> !neura.data<i64, i1>
+// MAPPING-NEXT:   neura.ctrl_mov %40 -> %15 {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 5 : i32}, {id = 27 : i32, resource = "link", time_step = 6 : i32}, {id = 27 : i32, resource = "link", time_step = 7 : i32}]} : !neura.data<i64, i1> !neura.data<i64, i1>
+// MAPPING-NEXT:   %41 = "neura.data_mov"(%34) {mapping_locs = []} : (!neura.data<f32, i1>) -> !neura.data<f32, i1>
+// MAPPING-NEXT:   "neura.return"(%41) {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 7 : i32}]} : (!neura.data<f32, i1>) -> ()
+// MAPPING-NEXT: }
+
+// INST:        "name": "neura.fadd",
+// INST-NEXT:   "operands": [
+// INST-NEXT:     "neura.data_mov",
+// INST-NEXT:     "neura.data_mov"
+// INST-NEXT:   ],
+// INST-NEXT:   "result_types": [
+// INST-NEXT:     "!neura.data<f32, i1>"
+// INST-NEXT:   ]
\ No newline at end of file