diff --git a/include/NeuraDialect/Mapping/mapping_util.h b/include/NeuraDialect/Mapping/mapping_util.h
index 21af298a..8c301aa1 100644
--- a/include/NeuraDialect/Mapping/mapping_util.h
+++ b/include/NeuraDialect/Mapping/mapping_util.h
@@ -12,6 +12,10 @@ OperationKind getOperationKindFromMlirOp(Operation *op);
 // Returns true if the operation does not need CGRA tile placement.
 bool is_non_materialized(Operation *op);
 
+// Returns true if the operation is a materialized reserve user, i.e.,
+// phi, invariant, carry.
+bool isMaterializedReserveUser(Operation *op);
+
 // Represents a recurrence cycle rooted at a reserve operation and closed by
 // ctrl_mov.
 struct RecurrenceCycle {
diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td
index ac6946af..06616450 100644
--- a/include/NeuraDialect/NeuraOps.td
+++ b/include/NeuraDialect/NeuraOps.td
@@ -489,4 +489,82 @@ def Neura_LoopControlOp : Op<NeuraDialect, "loop_control">{
 
   // let assemblyFormat =
   //   " `(``parent_valid` `=` $parentValid `,` `start` `=` $start `,` `end` `=` $end `,` `step` `=` $step`)` attr-dict `:` type($parentValid) `,` type($start) `,` type($end) `,` type($step) `->` type($nextindex) `,` type($valid)";
+}
+
+// ----------------------------------------------------
+// Defines operations for steering-control based DFG execution.
+// ----------------------------------------------------
+
+// Defines the true_steer operation.
+def Neura_TrueSteerOp : Op<NeuraDialect, "true_steer">{
+  let summary = "Conditionally pass a value when condition is true.";
+  let description = [{When the condition is true, the input value is passed to the output; otherwise, the output is empty.
+    Example:
+      %out = neura.true_steer %in, %cond : i32, i1 -> f32
+  }];
+
+  let arguments = (ins AnyType:$input, AnyType:$condition);
+  let results = (outs AnyType:$output);
+
+  let assemblyFormat = "$input `,` $condition attr-dict `:` type($input) `,` type($condition) `->` type($output)";
+}
+
+// Defines the false_steer operation.
+def Neura_FalseSteerOp : Op<NeuraDialect, "false_steer">{
+  let summary = "Conditionally pass a value when condition is false.";
+  let description = [{When the condition is false, the input value is passed to the output; otherwise, the output is empty.
+    Example:
+      %out = neura.false_steer %in, %cond : i32, i1 -> f32
+  }];
+
+  let arguments = (ins AnyType:$input, AnyType:$condition);
+  let results = (outs AnyType:$output);
+
+  let assemblyFormat = "$input `,` $condition attr-dict `:` type($input) `,` type($condition) `->` type($output)";
+}
+
+// Defines the carry operation.
+def Neura_CarryOp : Op<NeuraDialect, "carry">{
+  let summary = "Carry state across iterations.";
+  let description = [{
+  Three inputs for carry operation:
+    - initial value: used in the first execution.
+    - condition: determines whether to use the carried value.
+    - carried value: used when condition is true.
+  The output is the initial value when it is executed for the first time, otherwise it is the carried value when the condition is true.
+  Example:
+    %out = neura.carry %init, %cond, %carry_val : i32, i1, i32 -> i32
+  }];
+
+  let arguments = (ins AnyType:$initial, AnyType:$condition, AnyType:$carried);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "$initial `,` $condition `,` $carried attr-dict `:` type($initial) `,` type($condition) `,` type($carried) `->` type($result)";
+}
+
+// Defines the merge operation.
+def Neura_MergeOp : Op<NeuraDialect, "merge">{
+  let summary = "Merge multiple inputs into one output.";
+  let description = [{
+  Merges multiple input values into a single output value based on the condition.
+  Example:
+      %out = neura.merge %cond, %in1, %in2 : i1, i32, i32 -> i32
+  }];
+
+  let arguments = (ins AnyType:$condition, AnyType:$true_value, AnyType:$false_value);
+  let results = (outs AnyType:$result);
+
+  let assemblyFormat = "$condition `,` $true_value `,` $false_value attr-dict `:` type($condition) `,` type($true_value) `,` type($false_value) `->` type($result)";
+}
+
+// Defines the invariant operation.
+def Neura_InvariantOp : Op<NeuraDialect, "invariant">{
+  let summary = "Invariant value across DFG execution.";
+  let description = [{
+  Invariant operation is a subset of carry operation where the output is always the initial value.
+  Example:
+      %out = neura.invariant %init %cond : i32, i1 -> i32
+  }];
+  let arguments = (ins AnyType:$initial, AnyType:$condition);
+  let results = (outs AnyType:$result);
+  let assemblyFormat = "$initial `,` $condition attr-dict `:` type($initial) `,` type($condition) `->` type($result)";
 }
\ No newline at end of file
diff --git a/include/NeuraDialect/NeuraPasses.h b/include/NeuraDialect/NeuraPasses.h
index 5a220d12..1f2cf046 100644
--- a/include/NeuraDialect/NeuraPasses.h
+++ b/include/NeuraDialect/NeuraPasses.h
@@ -27,6 +27,8 @@ std::unique_ptr<mlir::Pass> createMapToAcceleratorPass();
 std::unique_ptr<mlir::Pass> createGenerateCodePass();
 std::unique_ptr<mlir::Pass> createCanonicalizeLiveInPass();
 std::unique_ptr<mlir::Pass> createPromoteFuncArgToConstPass();
+std::unique_ptr<mlir::Pass> createTransformToSteerControlPass();
+std::unique_ptr<mlir::Pass> createRemovePredicatedTypePass();
 
 // ====================================
 // Optimization Passes
diff --git a/include/NeuraDialect/NeuraPasses.td b/include/NeuraDialect/NeuraPasses.td
index c96e0db1..2623b4be 100644
--- a/include/NeuraDialect/NeuraPasses.td
+++ b/include/NeuraDialect/NeuraPasses.td
@@ -116,4 +116,23 @@ def FoldConstant : Pass<"fold-constant", "ModuleOp"> {
   let constructor = "neura::createFoldConstantPass()";
 }
 
+def TransformToSteerControl : Pass<"transform-to-steer-control", "func::FuncOp"> {
+  let summary = "Transform control flow into data flow using steer control";
+  let description = [{
+    This pass transforms Neura control flow graphs (CDFG) into pure dataflow graphs (DFG)
+    using steer control operations like true_steer, false_steer, carry, and merge.
+    Unlike predication-based approaches, steer control explicitly directs data through
+    different paths based on conditions.
+  }];
+  let constructor = "neura::createTransformToSteerControlPass()";
+}
+
+def RemovePredicatedType : Pass<"remove-predicated-type", "ModuleOp"> {
+  let summary = "Removes predicated types from Neura dialect operations";
+  let description = [{
+    This pass removes predicated types from Neura dialect operations,
+    converting them back to regular types.
+  }];
+  let constructor = "neura::createRemovePredicatedTypePass()";
+}
 #endif // NEURA_PASSES_TD
\ No newline at end of file
diff --git a/lib/NeuraDialect/Mapping/mapping_util.cpp b/lib/NeuraDialect/Mapping/mapping_util.cpp
index 2f3d9f93..a7d2749a 100644
--- a/lib/NeuraDialect/Mapping/mapping_util.cpp
+++ b/lib/NeuraDialect/Mapping/mapping_util.cpp
@@ -316,12 +316,9 @@ mlir::Operation *mlir::neura::getMaterializedBackwardUser(Operation *op) {
          "Expected the user of ctrl_mov target to be a reserve operation");
   auto reserve_op = dyn_cast<neura::ReserveOp>(target.getDefiningOp());
 
-  // Skip ctrl_mov users of reserve; return the first phi user.
+  // Skip ctrl_mov users of reserve; return the first materialized user.
   for (Operation *user : reserve_op.getResult().getUsers()) {
-    if (isa<neura::CtrlMovOp>(user)) {
-      continue; // skip ctrl_mov user
-    }
-    if (isa<neura::PhiOp>(user)) {
+    if (isMaterializedReserveUser(user)) {
       return user;
     }
   }
@@ -702,6 +699,19 @@ bool mlir::neura::canReachLocInTime(const MappingLoc &src_loc,
   return false;
 }
 
+bool mlir::neura::isMaterializedReserveUser(Operation *user) {
+  if (isa<neura::PhiOp>(user)) {
+    return true;
+  }
+  if (isa<neura::InvariantOp>(user)) {
+    return true;
+  }
+  if (isa<neura::CarryOp>(user)) {
+    return true;
+  }
+  return false;
+}
+
 void mlir::neura::updateAward(std::map<MappingLoc, int> &locs_with_award,
                               MappingLoc loc, int award) {
   // Updates the award of the top element in the priority queue.
@@ -752,8 +762,9 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
     assert(ctrl_mov && "Expected user to be a CtrlMovOp");
     mlir::Operation *materialized_backward_op =
         getMaterializedBackwardUser(ctrl_mov);
-    assert(isa<neura::PhiOp>(materialized_backward_op) &&
-           "Expected materialized operation of ctrl_mov to be a PhiOp");
+    assert(isMaterializedReserveUser(materialized_backward_op) &&
+           "Expected materialized operation of ctrl_mov to be a "
+           "PhiOp/InvariantOp/CarryOp.");
     backward_users.push_back(materialized_backward_op);
   }
 
@@ -794,10 +805,7 @@ mlir::neura::calculateAward(Operation *op, std::set<Operation *> &critical_ops,
       award += op->getOperands().size() -
                getPhysicalHops(producers, tile, mapping_state);
     }
-    // llvm::errs() << "[DEBUG] checking range: "
-    //              << earliest_start_time_step << " to "
-    //              << latest_end_time_step << " for tile: "
-    //              << tile->getType() << "#" << tile->getId() << "\n";
+
     for (int t = earliest_start_time_step; t < latest_end_time_step; t += 1) {
       MappingLoc tile_loc_candidate = {tile, t};
       // If the tile at time `t` is available, we can consider it for mapping.
@@ -942,8 +950,9 @@ bool mlir::neura::placeAndRoute(Operation *op, const MappingLoc &target_loc,
       assert(ctrl_mov && "Expected user to be a CtrlMovOp");
       mlir::Operation *materialized_backward_op =
           getMaterializedBackwardUser(ctrl_mov);
-      assert(isa<neura::PhiOp>(materialized_backward_op) &&
-             "Expected materialized operation of ctrl_mov to be a PhiOp");
+      assert(isMaterializedReserveUser(materialized_backward_op) &&
+             "Expected materialized operation of ctrl_mov to be a "
+             "PhiOp/InvariantOp/CarryOp");
       // Gets the last location of the materialized operation.
       MappingLoc backward_loc =
           mapping_state.getAllLocsOfOp(materialized_backward_op).back();
diff --git a/lib/NeuraDialect/Transforms/CMakeLists.txt b/lib/NeuraDialect/Transforms/CMakeLists.txt
index 4913cc8a..62d93f23 100644
--- a/lib/NeuraDialect/Transforms/CMakeLists.txt
+++ b/lib/NeuraDialect/Transforms/CMakeLists.txt
@@ -13,6 +13,8 @@ add_mlir_library(
     CanonicalizeLiveInPass.cpp
     CanonicalizeCastPass.cpp
     PromoteFuncArgToConstPass.cpp
+    TransformToSteerControlPass.cpp
+    RemovePredicatedTypePass.cpp
 
     DEPENDS
     MLIRNeuraTransformsIncGen
diff --git a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
index e9fe627f..1a5cd810 100644
--- a/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
+++ b/lib/NeuraDialect/Transforms/InsertDataMovPass.cpp
@@ -77,9 +77,9 @@ struct InsertDataMovForNeuraOps : public RewritePattern {
     SmallVector<Value> new_operands;
     for (Value operand : op->getOperands()) {
       Operation *producer = operand.getDefiningOp();
-      // Skips adding mov for neura.reserve -> neura.phi.
-      if (isa<neura::PhiOp>(op) && producer &&
-          isa<neura::ReserveOp>(producer)) {
+
+      // Skips adding mov for any operand that comes from a reserve op.
+      if (producer && isa<neura::ReserveOp>(producer)) {
         new_operands.push_back(operand);
         continue;
       }
diff --git a/lib/NeuraDialect/Transforms/LeveragePredicatedValuePass.cpp b/lib/NeuraDialect/Transforms/LeveragePredicatedValuePass.cpp
index e78dfd6c..62f946ac 100644
--- a/lib/NeuraDialect/Transforms/LeveragePredicatedValuePass.cpp
+++ b/lib/NeuraDialect/Transforms/LeveragePredicatedValuePass.cpp
@@ -45,15 +45,15 @@ struct LeveragePredicatedValuePass
         }
 
         for (BlockArgument arg : block->getArguments()) {
-          Type origType = arg.getType();
+          Type orig_type = arg.getType();
 
           // Avoid double-wrapping if already predicated
-          if (llvm::isa<neura::PredicatedValue>(origType)) {
+          if (llvm::isa<neura::PredicatedValue>(orig_type)) {
             continue;
           }
 
           auto predicated_type = neura::PredicatedValue::get(
-              func.getContext(), origType,
+              func.getContext(), orig_type,
               IntegerType::get(func.getContext(), 1));
           arg.setType(predicated_type);
         }
diff --git a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
index 906f5ac1..7e11d696 100644
--- a/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
+++ b/lib/NeuraDialect/Transforms/MapToAcceleratorPass.cpp
@@ -63,41 +63,41 @@ struct MapToAcceleratorPass
   void runOnOperation() override {
     ModuleOp module = getOperation();
     std::unique_ptr<Mapping> mapping_strategy;
-    StringRef mappingStrategy_stringRef(mappingStrategy.getValue());
-    StringRef backtrackConfig_stringRef(backtrackConfig.getValue());
-    StringRef mappingMode_stringRef(mappingMode.getValue());
-    bool is_spatial_only = (mappingMode_stringRef == "spatial-only");
-    if (is_spatial_only || mappingMode_stringRef == "spatial-temporal" ||
-        mappingMode_stringRef.empty()) {
-      if (mappingMode_stringRef.empty()) {
-        mappingMode_stringRef = "spatial-temporal";
+    StringRef mapping_strategy_stringRef(mappingStrategy.getValue());
+    StringRef backtrack_config_stringRef(backtrackConfig.getValue());
+    StringRef mapping_mode_stringRef(mappingMode.getValue());
+    bool is_spatial_only = (mapping_mode_stringRef == "spatial-only");
+    if (is_spatial_only || mapping_mode_stringRef == "spatial-temporal" ||
+        mapping_mode_stringRef.empty()) {
+      if (mapping_mode_stringRef.empty()) {
+        mapping_mode_stringRef = "spatial-temporal";
       }
       llvm::errs() << "[MapToAcceleratorPass] Using Mapping Mode: "
-                   << mappingMode_stringRef << "\n";
+                   << mapping_mode_stringRef << "\n";
     } else {
       llvm::errs() << "[MapToAcceleratorPass] Unsupported mapping mode: "
-                   << mappingMode_stringRef << "\n";
+                   << mapping_mode_stringRef << "\n";
       return;
     }
 
-    if (mappingStrategy_stringRef == "heuristic" ||
-        mappingStrategy_stringRef.empty()) {
-      mappingStrategy_stringRef = "heuristic";
+    if (mapping_strategy_stringRef == "heuristic" ||
+        mapping_strategy_stringRef.empty()) {
+      mapping_strategy_stringRef = "heuristic";
 
-      if (backtrackConfig_stringRef == "simple") {
+      if (backtrack_config_stringRef == "simple") {
         mapping_strategy = std::make_unique<HeuristicMapping>(1, 1);
-      } else if (backtrackConfig_stringRef == "greedy") {
+      } else if (backtrack_config_stringRef == "greedy") {
         mapping_strategy = std::make_unique<HeuristicMapping>(INT_MAX, 1);
-      } else if (backtrackConfig_stringRef == "exhaustive") {
+      } else if (backtrack_config_stringRef == "exhaustive") {
         mapping_strategy = std::make_unique<HeuristicMapping>(INT_MAX, INT_MAX);
-      } else if (backtrackConfig_stringRef == "customized") {
+      } else if (backtrack_config_stringRef == "customized") {
         mapping_strategy = std::make_unique<HeuristicMapping>(5, 3);
-      } else if (backtrackConfig_stringRef.starts_with("customized=")) {
+      } else if (backtrack_config_stringRef.starts_with("customized=")) {
         // Used for custom backtrack parameters.
         // Example: "customized=5,3" means max_loc=5, max_depth=3
         // Extracts the parameters after "customized=".
         StringRef paramsRef =
-            backtrackConfig_stringRef.substr(strlen("customized="));
+            backtrack_config_stringRef.substr(strlen("customized="));
         size_t comma_pos = paramsRef.find(',');
 
         if (comma_pos != StringRef::npos) {
@@ -116,19 +116,19 @@ struct MapToAcceleratorPass
           } else {
             llvm::errs() << "[MapToAcceleratorPass] Illegal customized "
                             "parameters format: "
-                         << backtrackConfig_stringRef << "\n";
+                         << backtrack_config_stringRef << "\n";
             return;
           }
         } else {
           llvm::errs()
               << "[MapToAcceleratorPass] Illegal customized parameters format: "
-              << backtrackConfig_stringRef << "\n";
+              << backtrack_config_stringRef << "\n";
           return;
         }
       }
     } else {
       llvm::errs() << "[MapToAcceleratorPass] Unsupported mapping strategy: "
-                   << mappingStrategy_stringRef << "\n";
+                   << mapping_strategy_stringRef << "\n";
       return;
     }
 
@@ -139,6 +139,26 @@ struct MapToAcceleratorPass
         return;
       }
 
+      // Checks the dataflow IR mode.
+      auto dataflow_mode_attr =
+          func->getAttrOfType<StringAttr>("dataflow_mode");
+      bool is_steering_mode =
+          (dataflow_mode_attr && dataflow_mode_attr.getValue() == "steering");
+
+      // If steering mode, enforce spatial-only mapping.
+      if (is_steering_mode) {
+        if (mapping_mode_stringRef != "spatial-only") {
+          func.emitError() << "Steering IR mode requires spatial-only mapping, "
+                           << "but got mapping mode: "
+                           << mapping_mode_stringRef;
+          signalPassFailure();
+          return;
+        }
+        llvm::errs() << "[MapToAcceleratorPass] Using spatial-only mapping for "
+                        "steering mode function: "
+                     << func.getName() << "\n";
+      }
+
       // Collects and reports recurrence cycles found in the function.
       auto recurrence_cycles = collectRecurrenceCycles(func);
       std::set<Operation *> critical_ops;
@@ -228,9 +248,9 @@ struct MapToAcceleratorPass
                               IntegerAttr::get(IntegerType::get(ctx, 32),
                                                architecture.getHeight())),
                NamedAttribute(StringAttr::get(ctx, "mapping_strategy"),
-                              StringAttr::get(ctx, mappingStrategy_stringRef)),
+                              StringAttr::get(ctx, mapping_strategy_stringRef)),
                NamedAttribute(StringAttr::get(ctx, "mapping_mode"),
-                              StringAttr::get(ctx, mappingMode_stringRef)),
+                              StringAttr::get(ctx, mapping_mode_stringRef)),
                NamedAttribute(StringAttr::get(ctx, "compiled_ii"),
                               IntegerAttr::get(IntegerType::get(ctx, 32), ii)),
                NamedAttribute(
diff --git a/lib/NeuraDialect/Transforms/RemovePredicatedTypePass.cpp b/lib/NeuraDialect/Transforms/RemovePredicatedTypePass.cpp
new file mode 100644
index 00000000..68647bfd
--- /dev/null
+++ b/lib/NeuraDialect/Transforms/RemovePredicatedTypePass.cpp
@@ -0,0 +1,156 @@
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "NeuraDialect/NeuraPasses.h"
+#include "NeuraDialect/NeuraTypes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+
+#define GEN_PASS_DEF_REMOVEPREDICATEDTYPE
+#include "NeuraDialect/NeuraPasses.h.inc"
+
+namespace {
+
+struct RemovePredicatedTypePass
+    : public PassWrapper<RemovePredicatedTypePass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(RemovePredicatedTypePass)
+
+  StringRef getArgument() const override { return "remove-predicated-type"; }
+  StringRef getDescription() const override {
+    return "Remove predicated types from Neura dialect operations, reverting "
+           "to basic types.";
+  }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+
+    // Processes each function.
+    module.walk([&](FunctionOpInterface func) {
+      auto accel_attr = func->getAttrOfType<StringAttr>("accelerator");
+      if (!accel_attr || accel_attr.getValue() != "neura") {
+        return;
+      }
+
+      // Converts block arguments.
+      func.walk([&](Block *block) {
+        // Processes block arguments.
+        for (BlockArgument arg : block->getArguments()) {
+          Type orig_type = arg.getType();
+          if (auto predicated_type =
+                  llvm::dyn_cast<neura::PredicatedValue>(orig_type)) {
+            arg.setType(predicated_type.getValueType());
+          }
+        }
+      });
+
+      // Gets operations in topological order.
+      SmallVector<Operation *> ordered_ops;
+      getOperationsInTopologicalOrder(func, ordered_ops);
+
+      // Processes each operation in topological order.
+      for (Operation *op : ordered_ops) {
+        if (failed(removePredicatedType(op))) {
+          llvm::errs() << "Failed to convert op from predicated form: " << *op
+                       << "\n";
+          signalPassFailure();
+          return;
+        }
+      }
+    });
+  }
+
+private:
+  // Gets operations in topological order.
+  void getOperationsInTopologicalOrder(FunctionOpInterface func,
+                                       SmallVector<Operation *> &ordered_ops) {
+    DenseSet<Operation *> visited_ops;
+    func.walk<WalkOrder::PreOrder>([&](Operation *op) {
+      if (visited_ops.contains(op)) {
+        return;
+      }
+
+      // Visits operands first.
+      for (Value operand : op->getOperands()) {
+        if (Operation *def_op = operand.getDefiningOp()) {
+          if (!visited_ops.contains(def_op)) {
+            visited_ops.insert(def_op);
+            ordered_ops.push_back(def_op);
+          }
+        }
+      }
+
+      if (!visited_ops.contains(op)) {
+        visited_ops.insert(op);
+        ordered_ops.push_back(op);
+      }
+    });
+  }
+
+  // Converts a single operation from predicated to normal types.
+  LogicalResult removePredicatedType(Operation *op) {
+    // Skips if not a Neura op.
+    if (op->getDialect()->getNamespace() != "neura") {
+      return success();
+    }
+
+    // Skips if no results or no predicated types.
+    if (op->getNumResults() == 0 ||
+        !llvm::any_of(op->getResultTypes(), [](Type t) {
+          return mlir::isa<mlir::neura::PredicatedValue>(t);
+        })) {
+      return success();
+    }
+
+    // Converts result types to non-predicated form.
+    OpBuilder builder(op);
+    SmallVector<Type> new_results;
+    for (Type t : op->getResultTypes()) {
+      if (auto predicated_type = llvm::dyn_cast<neura::PredicatedValue>(t)) {
+        new_results.push_back(predicated_type.getValueType());
+      } else {
+        new_results.push_back(t);
+      }
+    }
+
+    // Creates new operation with updated result types.
+    OperationState state(op->getLoc(), op->getName());
+    state.addOperands(op->getOperands());
+    state.addTypes(new_results);
+    state.addAttributes(op->getAttrs());
+
+    // Copies regions if needed.
+    for (unsigned i = 0; i < op->getNumRegions(); ++i) {
+      state.addRegion();
+    }
+
+    Operation *new_op = builder.create(state);
+
+    // Moves regions if any.
+    for (unsigned i = 0; i < op->getNumRegions(); ++i) {
+      Region &old_region = op->getRegion(i);
+      Region &new_region = new_op->getRegion(i);
+      new_region.takeBody(old_region);
+    }
+
+    // Replaces old op.
+    op->replaceAllUsesWith(new_op);
+    op->erase();
+    return success();
+  }
+};
+
+} // namespace
+
+namespace mlir {
+namespace neura {
+
+std::unique_ptr<Pass> createRemovePredicatedTypePass() {
+  return std::make_unique<RemovePredicatedTypePass>();
+}
+
+} // namespace neura
+} // namespace mlir
\ No newline at end of file
diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp
index 2b06740e..7875c7a5 100644
--- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp
+++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp
@@ -545,6 +545,42 @@ void transformControlFlowToDataFlow(Region &region, ControlFlowInfo &ctrl_info,
                     "block after flattening.\n";
     assert(false && "No ReturnOp found in the entry block after flattening.");
   }
+
+  // Sets the "dataflow_mode" attribute to "predicate" for the parent function.
+  if (auto func = dyn_cast<func::FuncOp>(region.getParentOp())) {
+    if (!func->hasAttr("dataflow_mode")) {
+      func->setAttr("dataflow_mode",
+                    StringAttr::get(func.getContext(), "predicate"));
+      llvm::errs()
+          << "[ctrl2data] Set dataflow mode to predicate for function: "
+          << func.getName() << "\n";
+    } else {
+      llvm::errs()
+          << "[ctrl2data] Function " << func.getName()
+          << " already has dataflow_mode set to "
+          << func->getAttrOfType<StringAttr>("dataflow_mode").getValue()
+          << "\n";
+      func->setAttr("dataflow_mode",
+                    StringAttr::get(func.getContext(), "predicate"));
+    }
+  } else if (auto llvm_func =
+                 dyn_cast<LLVM::LLVMFuncOp>(region.getParentOp())) {
+    if (!llvm_func->hasAttr("dataflow_mode")) {
+      llvm_func->setAttr("dataflow_mode",
+                         StringAttr::get(llvm_func.getContext(), "predicate"));
+      llvm::errs()
+          << "[ctrl2data] Set dataflow mode to predicate for LLVM function: "
+          << llvm_func.getName() << "\n";
+    } else {
+      llvm::errs()
+          << "[ctrl2data] LLVM function " << llvm_func.getName()
+          << " already has dataflow_mode set to "
+          << llvm_func->getAttrOfType<StringAttr>("dataflow_mode").getValue()
+          << "\n";
+      llvm_func->setAttr("dataflow_mode",
+                         StringAttr::get(llvm_func.getContext(), "predicate"));
+    }
+  }
 }
 
 namespace {
diff --git a/lib/NeuraDialect/Transforms/TransformToSteerControlPass.cpp b/lib/NeuraDialect/Transforms/TransformToSteerControlPass.cpp
new file mode 100644
index 00000000..f11df157
--- /dev/null
+++ b/lib/NeuraDialect/Transforms/TransformToSteerControlPass.cpp
@@ -0,0 +1,755 @@
+#include "NeuraDialect/NeuraDialect.h"
+#include "NeuraDialect/NeuraOps.h"
+#include "NeuraDialect/NeuraPasses.h"
+#include "NeuraDialect/NeuraTypes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/LogicalResult.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace mlir;
+
+#define GEN_PASS_DEF_TRANSFORMTOSTEERCONTROL
+#include "NeuraDialect/NeuraPasses.h.inc"
+
+namespace {
+class OperationsToErase {
+public:
+  void markForErasure(Operation *op) {
+    if (op) {
+      ops_to_erase.insert(op);
+    }
+  }
+
+  void eraseMarkedOperations() {
+    for (auto it = this->ops_to_erase.rbegin(); it != this->ops_to_erase.rend();
+         ++it) {
+      if (!(*it)->use_empty()) {
+        continue;
+      }
+      (*it)->erase();
+    }
+    ops_to_erase.clear();
+  }
+
+private:
+  llvm::SetVector<Operation *> ops_to_erase;
+};
+
+class LoopAnalyzer {
+public:
+  struct LoopRecurrenceInfo {
+    // The reserve operation that starts the loop.
+    neura::ReserveOp reserve_op;
+    // The phi operation that merges values from different iterations.
+    neura::PhiOp phi_op;
+    // The initial value before the loop starts.
+    Value initial_value;
+    // The condition that controls the loop continuation.
+    Value condition;
+    // The value that is passed back to the next iteration.
+    Value backward_value;
+    // Whether the loop is invariant (i.e., does not depend on the loop body).
+    bool is_invariant;
+  };
+
+  LoopAnalyzer(func::FuncOp func) {
+    // Map from values to their corresponding reserve operations.
+    llvm::DenseMap<Value, neura::ReserveOp> value_to_reserve_map;
+
+    func.walk([&](neura::ReserveOp op) {
+      value_to_reserve_map[op.getResult()] = op;
+    });
+
+    llvm::DenseMap<Value, llvm::SmallVector<std::pair<Value, neura::CtrlMovOp>>>
+        target_to_source_ctrl_mov_map;
+
+    func.walk([&](neura::CtrlMovOp op) {
+      target_to_source_ctrl_mov_map[op.getTarget()].push_back(
+          {op.getValue(), op});
+    });
+
+    // Analyzes phi operations and the backward edges.
+    func.walk([&](neura::PhiOp phi_op) {
+      for (Value input : phi_op->getOperands()) {
+        auto reserve_it = value_to_reserve_map.find(input);
+        if (reserve_it == value_to_reserve_map.end()) {
+          continue;
+        }
+
+        neura::ReserveOp reserve_op = reserve_it->second;
+        auto ctrl_mov_it =
+            target_to_source_ctrl_mov_map.find(reserve_op.getResult());
+        assert(ctrl_mov_it != target_to_source_ctrl_mov_map.end() &&
+               "Reserve output must be a target of a ctrl_mov operation");
+
+        for (auto &[source_value, ctrl_mov_op] : ctrl_mov_it->second) {
+          Value initial_value = nullptr;
+          for (Value phi_input : phi_op->getOperands()) {
+            if (phi_input != reserve_op.getResult()) {
+              initial_value = phi_input;
+              break;
+            }
+          }
+          assert(initial_value != nullptr && "Phi must have an initial value");
+
+          Value condition = nullptr;
+          neura::GrantPredicateOp grant_op = nullptr;
+          for (auto phi_user : phi_op->getUsers()) {
+            if (isa<neura::GrantPredicateOp>(phi_user)) {
+              auto def_op = llvm::dyn_cast<neura::GrantPredicateOp>(phi_user);
+              if (def_op.getValue() == phi_op.getResult() &&
+                  !isa<neura::NotOp>(def_op.getPredicate().getDefiningOp())) {
+                llvm::errs() << "[ctrl2steer] Found loop condition: "
+                             << def_op.getPredicate() << "\n";
+                condition = def_op.getPredicate();
+                grant_op = def_op;
+                break;
+              }
+            }
+          }
+
+          assert(condition && grant_op &&
+                 "Phi must have a corresponding grant_predicate operation");
+
+          // Checks if the source_value is a loop invariant.
+          bool is_invariant = false;
+          if (source_value == grant_op.getResult()) {
+            is_invariant = true;
+          }
+
+          // Records the loop information.
+          this->loop_recurrences.push_back({reserve_op, phi_op, initial_value,
+                                            condition, source_value,
+                                            is_invariant});
+
+          // Maps the phi operation to its loop recurrence index.
+          this->phi_to_loop_recurrences[phi_op.getResult()] =
+              loop_recurrences.size() - 1;
+
+          // Records the reserve operations that are part of loops.
+          this->loop_reserves.insert(reserve_op.getResult());
+        }
+      }
+    });
+  }
+
+  const llvm::SmallVector<LoopRecurrenceInfo> &getLoopRecurrences() const {
+    return loop_recurrences;
+  }
+
+  bool isLoopReserve(Value value) const {
+    return loop_reserves.contains(value);
+  }
+
+  bool isLoopPhi(Value value) const {
+    return phi_to_loop_recurrences.contains(value);
+  }
+
+  const LoopRecurrenceInfo *getLoopRecurrenceInfo(Value phi_value) const {
+    auto it = phi_to_loop_recurrences.find(phi_value);
+    if (it != phi_to_loop_recurrences.end()) {
+      return &loop_recurrences[it->second];
+    }
+    return nullptr;
+  }
+
+private:
+  llvm::SmallVector<LoopRecurrenceInfo> loop_recurrences;
+  llvm::DenseMap<Value, unsigned> phi_to_loop_recurrences;
+  llvm::DenseSet<Value> loop_reserves;
+};
+
+class BackwardValueHandler {
+public:
+  BackwardValueHandler(PatternRewriter &rewriter) : rewriter(rewriter) {}
+
+  Value createReserveForBackwardValue(Value backward_value,
+                                      Operation *insertion_point) {
+    auto it = backward_value_reserve_map.find(backward_value);
+    if (it != backward_value_reserve_map.end()) {
+      return it->second;
+    }
+
+    llvm::errs() << "[ctrl2steer] Creating reserve for backward value: "
+                 << backward_value << "\n";
+
+    // Creates the reserve operation for the backward value.
+    this->rewriter.setInsertionPointToStart(insertion_point->getBlock());
+    auto reserve_op = this->rewriter.create<neura::ReserveOp>(
+        backward_value.getLoc(), backward_value.getType());
+    this->backward_value_reserve_map[backward_value] = reserve_op.getResult();
+
+    llvm::errs() << "[ctrl2steer] Creating ctrl_mov for backward value: "
+                 << backward_value << "\n";
+
+    // Creates a ctrl_mov operation to move the backward value into the reserve.
+    this->rewriter.setInsertionPointAfter(backward_value.getDefiningOp());
+    this->rewriter.create<neura::CtrlMovOp>(
+        backward_value.getLoc(), backward_value, reserve_op.getResult());
+
+    return reserve_op.getResult();
+  }
+
+private:
+  PatternRewriter &rewriter;
+  // Map from backward values to their corresponding reserve values.
+  llvm::DenseMap<Value, Value> backward_value_reserve_map;
+};
+
+class PhiToCarryPattern : public OpRewritePattern<neura::PhiOp> {
+public:
+  PhiToCarryPattern(MLIRContext *context, const LoopAnalyzer &loop_analyzer,
+                    BackwardValueHandler &backward_value_handler,
+                    OperationsToErase &ops_to_erase)
+      : OpRewritePattern<neura::PhiOp>(context), loop_analyzer(loop_analyzer),
+        backward_value_handler(backward_value_handler),
+        ops_to_erase(ops_to_erase) {}
+
+  LogicalResult matchAndRewrite(neura::PhiOp phi_op,
+                                PatternRewriter &rewriter) const override {
+    // If the phi operation is not part of a loop, we do not handle it here.
+    if (!loop_analyzer.isLoopPhi(phi_op.getResult())) {
+      return failure();
+    }
+
+    const auto *loop_recurrence_info =
+        loop_analyzer.getLoopRecurrenceInfo(phi_op.getResult());
+    assert(loop_recurrence_info && "Loop recurrence info must be available");
+
+    // Creates a reserve operation for the loop condition.
+    Value condition = loop_recurrence_info->condition;
+    assert(condition && "Loop condition must be available");
+    Value condition_reserve =
+        this->backward_value_handler.createReserveForBackwardValue(condition,
+                                                                   phi_op);
+
+    // Creates a carry or a invariant operation based on whether the loop
+    // recurrence is invariant.
+    rewriter.setInsertionPoint(phi_op);
+    Value result;
+    if (loop_recurrence_info->is_invariant) {
+      auto invariant_op = rewriter.create<neura::InvariantOp>(
+          phi_op.getLoc(), phi_op.getType(),
+          loop_recurrence_info->initial_value, condition_reserve);
+      result = invariant_op.getResult();
+      // rewriter.replaceOp(phi_op, invariant_op.getResult());
+    } else {
+      Value backward_reserve =
+          this->backward_value_handler.createReserveForBackwardValue(
+              loop_recurrence_info->backward_value, phi_op);
+      auto carry_op =
+          rewriter.create<neura::CarryOp>(phi_op.getLoc(), phi_op.getType(),
+                                          loop_recurrence_info->initial_value,
+                                          condition_reserve, backward_reserve);
+      result = carry_op.getResult();
+      // rewriter.replaceOp(phi_op, carry_op.getResult());
+    }
+
+    llvm::SmallVector<neura::GrantPredicateOp> related_grant_ops;
+    for (auto *user : phi_op->getUsers()) {
+      if (auto grant_op = dyn_cast<neura::GrantPredicateOp>(user)) {
+        if (grant_op.getValue() == phi_op.getResult() &&
+            grant_op.getPredicate() == condition) {
+          related_grant_ops.push_back(grant_op);
+        }
+      }
+    }
+
+    // Marks the related operations for erasure.
+    for (auto grant_op : related_grant_ops) {
+      rewriter.replaceAllOpUsesWith(grant_op, result);
+      ops_to_erase.markForErasure(grant_op);
+    }
+
+    rewriter.replaceOp(phi_op, result);
+
+    this->ops_to_erase.markForErasure(loop_recurrence_info->reserve_op);
+    for (auto *user : loop_recurrence_info->reserve_op->getUsers()) {
+      if (auto ctrl_mov_op = dyn_cast<neura::CtrlMovOp>(user)) {
+        ops_to_erase.markForErasure(ctrl_mov_op);
+      }
+    }
+    return success();
+  }
+
+private:
+  const LoopAnalyzer &loop_analyzer;
+  BackwardValueHandler &backward_value_handler;
+  OperationsToErase &ops_to_erase;
+};
+
+class MergePatternFinder {
+public:
+  struct MergeCandidate {
+    Value condition;
+    Value true_value;
+    Value false_value;
+    neura::GrantPredicateOp true_grant;
+    neura::GrantPredicateOp false_grant;
+    neura::PhiOp phi_op;
+    neura::NotOp not_op;
+  };
+
+  MergePatternFinder(func::FuncOp func) {
+    // Collects all the not operations.
+    llvm::DenseMap<Value, neura::NotOp> not_value_to_op;
+    llvm::DenseMap<Value, Value> negated_to_condition;
+
+    func.walk([&](neura::NotOp not_op) {
+      not_value_to_op[not_op.getResult()] = not_op;
+      negated_to_condition[not_op.getResult()] = not_op.getInput();
+    });
+
+    // Collects all the grant_predicate operations based on their conditions.
+    llvm::DenseMap<Value, llvm::SmallVector<neura::GrantPredicateOp>>
+        condition_to_grants;
+    func.walk([&](neura::GrantPredicateOp grant_op) {
+      condition_to_grants[grant_op.getPredicate()].push_back(grant_op);
+    });
+
+    func.walk([&](neura::PhiOp phi_op) {
+      // Phi operation must have two operands.
+      if (phi_op.getNumOperands() != 2) {
+        return;
+      }
+
+      Value input0 = phi_op.getOperand(0);
+      Value input1 = phi_op.getOperand(1);
+
+      // Each operand must be produced by a grant_predicate operation.
+      auto grant0 = input0.getDefiningOp<neura::GrantPredicateOp>();
+      auto grant1 = input1.getDefiningOp<neura::GrantPredicateOp>();
+      if (!grant0 || !grant1) {
+        return;
+      }
+
+      // Checks if the conditions of the two grant_predicate operations are
+      // complementary.
+      Value cond0 = grant0.getPredicate();
+      Value cond1 = grant1.getPredicate();
+
+      // Checks if one condition is the negation of the other.
+      neura::NotOp not_op = nullptr;
+      Value original_cond;
+      bool cond0_is_original = false;
+
+      // case 1: cond0 is the original condition, cond1 is its negation.
+      if (auto it = not_value_to_op.find(cond1);
+          it != not_value_to_op.end() && it->second.getInput() == cond0) {
+        not_op = it->second;
+        original_cond = cond0;
+        cond0_is_original = true;
+      }
+      // case 2: cond1 is the original condition, cond0 is its negation.
+      else if (auto it = not_value_to_op.find(cond0);
+               it != not_value_to_op.end() && it->second.getInput() == cond1) {
+        not_op = it->second;
+        original_cond = cond1;
+        cond0_is_original = false;
+      }
+      // Conditions are not complementary, not the pattern we are looking for.
+      else {
+        return;
+      }
+
+      // Determines true branch and false branch
+      neura::GrantPredicateOp true_grant = cond0_is_original ? grant0 : grant1;
+      neura::GrantPredicateOp false_grant = cond0_is_original ? grant1 : grant0;
+      Value true_value = true_grant.getValue();
+      Value false_value = false_grant.getValue();
+
+      // Records the found merge pattern
+      merge_candidates.push_back({original_cond, true_value, false_value,
+                                  true_grant, false_grant, phi_op, not_op});
+
+      // Records the operations involved in the merge
+      phi_in_merge[phi_op] = merge_candidates.size() - 1;
+      grants_in_merge.insert(true_grant);
+      grants_in_merge.insert(false_grant);
+      if (not_op) {
+        nots_in_merge.insert(not_op);
+      }
+    });
+  }
+
+  llvm::SmallVector<MergeCandidate> getMergeCandidates() const {
+    return this->merge_candidates;
+  }
+
+  bool isPhiInMerge(neura::PhiOp phi_op) const {
+    return phi_in_merge.contains(phi_op);
+  }
+
+  bool isGrantInMerge(neura::GrantPredicateOp grant_op) const {
+    return grants_in_merge.contains(grant_op);
+  }
+
+  bool isNotInMerge(neura::NotOp not_op) const {
+    return nots_in_merge.contains(not_op);
+  }
+
+  const MergeCandidate *getMergeCandidateForPhi(neura::PhiOp phi_op) const {
+    auto it = phi_in_merge.find(phi_op);
+    if (it != phi_in_merge.end())
+      return &merge_candidates[it->second];
+    return nullptr;
+  }
+
+private:
+  llvm::SmallVector<MergeCandidate> merge_candidates;
+  llvm::DenseMap<neura::PhiOp, unsigned> phi_in_merge;
+  llvm::DenseSet<neura::GrantPredicateOp> grants_in_merge;
+  llvm::DenseSet<neura::NotOp> nots_in_merge;
+};
+
+class PhiToMergePattern : public OpRewritePattern<neura::PhiOp> {
+public:
+  PhiToMergePattern(MLIRContext *context,
+                    const MergePatternFinder &merge_pattern_finder,
+                    OperationsToErase &ops_to_erase)
+      : OpRewritePattern<neura::PhiOp>(context),
+        merge_pattern_finder(merge_pattern_finder), ops_to_erase(ops_to_erase) {
+  }
+
+  LogicalResult matchAndRewrite(neura::PhiOp phi_op,
+                                PatternRewriter &rewriter) const override {
+    // Checks if the phi operation is part of a merge pattern.
+    if (!merge_pattern_finder.isPhiInMerge(phi_op)) {
+      return failure();
+    }
+
+    const auto *merge_candidate =
+        merge_pattern_finder.getMergeCandidateForPhi(phi_op);
+    if (!merge_candidate) {
+      return failure();
+    }
+
+    rewriter.setInsertionPoint(phi_op);
+    auto merge_op = rewriter.create<neura::MergeOp>(
+        phi_op.getLoc(), phi_op.getType(), merge_candidate->condition,
+        merge_candidate->true_value, merge_candidate->false_value);
+
+    rewriter.replaceOp(phi_op, merge_op.getResult());
+
+    // Marks the related operations for erasure.
+    ops_to_erase.markForErasure(merge_candidate->true_grant);
+    ops_to_erase.markForErasure(merge_candidate->false_grant);
+    ops_to_erase.markForErasure(merge_candidate->not_op);
+
+    return success();
+  }
+
+private:
+  const MergePatternFinder &merge_pattern_finder;
+  OperationsToErase &ops_to_erase;
+};
+
+class SteerPhiToMergePattern : public OpRewritePattern<neura::PhiOp> {
+public:
+  SteerPhiToMergePattern(MLIRContext *context, OperationsToErase &ops_to_erase,
+                         MergePatternFinder &merge_pattern_finder,
+                         LoopAnalyzer &loop_analyzer)
+      : OpRewritePattern<neura::PhiOp>(context), ops_to_erase(ops_to_erase),
+        merge_pattern_finder(merge_pattern_finder),
+        loop_analyzer(loop_analyzer) {}
+
+  LogicalResult matchAndRewrite(neura::PhiOp phi_op,
+                                PatternRewriter &rewriter) const override {
+    // Checks if the phi operation has two operands.
+    if (phi_op.getNumOperands() != 2) {
+      return failure();
+    }
+
+    Value input0 = phi_op.getOperand(0);
+    Value input1 = phi_op.getOperand(1);
+
+    // Checks if the phi operation is already part of a merge pattern or a loop
+    // recurrence.
+    if (merge_pattern_finder.isPhiInMerge(phi_op) ||
+        loop_analyzer.isLoopPhi(phi_op.getResult())) {
+      return failure();
+    }
+
+    Value condition = nullptr;
+    Value true_value = nullptr;
+    Value false_value = nullptr;
+
+    // Case 1: Direct pattern: true_steer + false_steer
+    if (tryMatchDirectSteerPattern(input0, input1, condition, true_value,
+                                   false_value) ||
+        tryMatchDirectSteerPattern(input1, input0, condition, true_value,
+                                   false_value)) {
+      createMergeOp(phi_op, rewriter, condition, true_value, false_value);
+      return success();
+    }
+
+    // Case 2: One input is based on a true_steer, the other is a false_steer.
+    if (tryMatchIndirectSteerPattern(input0, input1, condition, true_value,
+                                     false_value) ||
+        tryMatchIndirectSteerPattern(input1, input0, condition, true_value,
+                                     false_value)) {
+      createMergeOp(phi_op, rewriter, condition, true_value, false_value);
+      return success();
+    }
+
+    return failure();
+  }
+
+private:
+  OperationsToErase &ops_to_erase;
+  MergePatternFinder &merge_pattern_finder;
+  LoopAnalyzer &loop_analyzer;
+
+  // Checks for direct true_steer + false_steer pattern.
+  bool tryMatchDirectSteerPattern(Value input1, Value input2, Value &condition,
+                                  Value &true_value, Value &false_value) const {
+    auto true_steer = input1.getDefiningOp<neura::TrueSteerOp>();
+    if (!true_steer) {
+      return false;
+    }
+
+    auto false_steer = input2.getDefiningOp<neura::FalseSteerOp>();
+    if (!false_steer) {
+      return false;
+    }
+
+    // Checks if both steer operations share the same condition.
+    if (true_steer.getCondition() != false_steer.getCondition()) {
+      return false;
+    }
+
+    condition = true_steer.getCondition();
+    true_value = input1;
+    false_value = input2;
+    return true;
+  }
+
+  // Checks if one input is based on a steer operation.
+  bool tryMatchIndirectSteerPattern(Value input1, Value input2,
+                                    Value &condition, Value &true_value,
+                                    Value &false_value) const {
+    // Checks if input2 is a false_steer.
+    auto false_steer = input2.getDefiningOp<neura::FalseSteerOp>();
+    if (!false_steer) {
+      return false;
+    }
+
+    condition = false_steer.getCondition();
+
+    // Checks the defining operation of input1.
+    Operation *def_op = input1.getDefiningOp();
+    if (!def_op) {
+      return false;
+    }
+
+    // Checks if the defining operation's inputs use true_steer.
+    bool found_true_steer = false;
+    for (Value operand : def_op->getOperands()) {
+      if (auto true_steer = operand.getDefiningOp<neura::TrueSteerOp>()) {
+        if (true_steer.getCondition() == condition) {
+          found_true_steer = true;
+          break;
+        }
+      }
+    }
+
+    if (!found_true_steer) {
+      return false;
+    }
+
+    true_value = input1;
+    false_value = input2;
+    return true;
+  }
+
+  // Creates a merge operation to replace the phi operation.
+  void createMergeOp(neura::PhiOp phi_op, PatternRewriter &rewriter,
+                     Value condition, Value true_value,
+                     Value false_value) const {
+    rewriter.setInsertionPoint(phi_op);
+    auto merge_op = rewriter.create<neura::MergeOp>(
+        phi_op.getLoc(), phi_op.getType(), condition, true_value, false_value);
+    rewriter.replaceOp(phi_op, merge_op.getResult());
+  }
+};
+
+class GrantPredicateToSteerPattern
+    : public OpRewritePattern<neura::GrantPredicateOp> {
+public:
+  GrantPredicateToSteerPattern(MLIRContext *context,
+                               OperationsToErase &ops_to_erase)
+      : OpRewritePattern<neura::GrantPredicateOp>(context),
+        ops_to_erase(ops_to_erase) {}
+
+  LogicalResult matchAndRewrite(neura::GrantPredicateOp grant_op,
+                                PatternRewriter &rewriter) const override {
+    Value value = grant_op.getValue();
+    Value condition = grant_op.getPredicate();
+
+    if (auto not_op = condition.getDefiningOp<neura::NotOp>()) {
+      // If the condition is a Not operation, we can transform it to the
+      // false_steer.
+      rewriter.setInsertionPoint(grant_op);
+      auto false_steer = rewriter.create<neura::FalseSteerOp>(
+          grant_op.getLoc(), value.getType(), value, not_op.getInput());
+      rewriter.replaceOp(grant_op, false_steer.getResult());
+      ops_to_erase.markForErasure(not_op);
+    } else {
+      // Otherwise, we transform it to the true_steer.
+      rewriter.setInsertionPoint(grant_op);
+      auto true_steer = rewriter.create<neura::TrueSteerOp>(
+          grant_op.getLoc(), value.getType(), value, condition);
+      rewriter.replaceOp(grant_op, true_steer.getResult());
+    }
+
+    return success();
+  }
+
+private:
+  OperationsToErase &ops_to_erase;
+};
+
+class GrantOnceRemovalPattern : public OpRewritePattern<neura::GrantOnceOp> {
+public:
+  GrantOnceRemovalPattern(MLIRContext *context)
+      : OpRewritePattern<neura::GrantOnceOp>(context) {}
+
+  LogicalResult matchAndRewrite(neura::GrantOnceOp grant_once_op,
+                                PatternRewriter &rewriter) const override {
+    Value input = grant_once_op.getValue();
+
+    rewriter.replaceOp(grant_once_op, input);
+    return success();
+  }
+};
+
+struct TransformToSteerControlPass
+    : public PassWrapper<TransformToSteerControlPass,
+                         OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TransformToSteerControlPass)
+
+  StringRef getArgument() const override {
+    return "transform-to-steer-control";
+  }
+
+  StringRef getDescription() const override {
+    return "Transform control flow into data flow using steer control "
+           "operations.";
+  }
+
+  void runOnOperation() override {
+    func::FuncOp func = getOperation();
+    MLIRContext &context = getContext();
+    PatternRewriter rewriter(&context);
+    OperationsToErase ops_to_erase;
+
+    RewritePatternSet grant_once_patterns(&context);
+    grant_once_patterns.add<GrantOnceRemovalPattern>(&context);
+    if (failed(applyPatternsGreedily(func, std::move(grant_once_patterns)))) {
+      signalPassFailure();
+    }
+    MergePatternFinder merge_pattern_finder(func);
+
+    RewritePatternSet merge_patterns(&context);
+    merge_patterns.add<PhiToMergePattern>(&context, merge_pattern_finder,
+                                          ops_to_erase);
+    if (failed(applyPatternsGreedily(func, std::move(merge_patterns)))) {
+      signalPassFailure();
+    }
+    // Erases the marked operations after processing all merge patterns.
+    ops_to_erase.eraseMarkedOperations();
+
+    LoopAnalyzer loop_analyzer(func);
+    BackwardValueHandler backward_value_handler(rewriter);
+
+    RewritePatternSet phi_patterns(&context);
+    phi_patterns.add<PhiToCarryPattern>(&context, loop_analyzer,
+                                        backward_value_handler, ops_to_erase);
+    if (failed(applyPatternsGreedily(func, std::move(phi_patterns)))) {
+      signalPassFailure();
+    }
+    // Erases the marked operations after processing all phi operations.
+    ops_to_erase.eraseMarkedOperations();
+
+    RewritePatternSet steer_patterns(&context);
+    steer_patterns.add<GrantPredicateToSteerPattern>(&context, ops_to_erase);
+
+    if (failed(applyPatternsGreedily(func, std::move(steer_patterns)))) {
+      signalPassFailure();
+    }
+    // Erases the marked operations after processing all grant_predicate
+    // operations.
+    ops_to_erase.eraseMarkedOperations();
+
+    RewritePatternSet steer_phi_patterns(&context);
+    steer_phi_patterns.add<SteerPhiToMergePattern>(
+        &context, ops_to_erase, merge_pattern_finder, loop_analyzer);
+    if (failed(applyPatternsGreedily(func, std::move(steer_phi_patterns)))) {
+      signalPassFailure();
+    }
+    // Erases the marked operations after processing all steer-phi patterns.
+    ops_to_erase.eraseMarkedOperations();
+
+    // Cleans up any remaining unused reserve, ctrl_mov, and not operations.
+    llvm::SmallVector<Operation *, 16> to_erase;
+    func.walk([&](Operation *op) {
+      if (isa<neura::ReserveOp>(op) && op->use_empty()) {
+        to_erase.push_back(op);
+      } else if (isa<neura::NotOp>(op) && op->use_empty()) {
+        to_erase.push_back(op);
+      } else if (auto ctrl_mov_op = dyn_cast<neura::CtrlMovOp>(op)) {
+        // Cleans up ctrl_mov operations whose target reserve is unused.
+        if (auto target_reserve =
+                ctrl_mov_op.getTarget().getDefiningOp<neura::ReserveOp>()) {
+          if (target_reserve->use_empty()) {
+            // If the target reserve is going to be deleted, this ctrl_mov can
+            // also be deleted.
+            to_erase.push_back(ctrl_mov_op);
+          }
+        }
+      }
+    });
+
+    // 从后向前删除，避免依赖问题
+    for (auto it = to_erase.rbegin(); it != to_erase.rend(); ++it) {
+      (*it)->erase();
+    }
+
+    // Checks if the function is now in predicate mode.
+    auto dataflow_mode_attr = func->getAttrOfType<StringAttr>("dataflow_mode");
+    if (!dataflow_mode_attr || dataflow_mode_attr.getValue() != "predicate") {
+      func.emitError("transform-to-steer-control requires function to be in "
+                     "predicate mode");
+      signalPassFailure();
+      return;
+    }
+    // Changes the dataflow_mode attribute to "steering".
+    func->setAttr("dataflow_mode", StringAttr::get(&context, "steering"));
+    llvm::errs()
+        << "[ctrl2steer] Changed dataflow mode from predicate to steering "
+           "for function: "
+        << func.getName() << "\n";
+  }
+};
+} // namespace
+
+namespace mlir {
+namespace neura {
+
+std::unique_ptr<Pass> createTransformToSteerControlPass() {
+  return std::make_unique<TransformToSteerControlPass>();
+}
+
+} // namespace neura
+} // namespace mlir
\ No newline at end of file
diff --git a/test/affine2neura/bert/bert_node1/bert_node1.mlir b/test/affine2neura/bert/bert_node1/bert_node1.mlir
index 8e36c502..6157dc76 100644
--- a/test/affine2neura/bert/bert_node1/bert_node1.mlir
+++ b/test/affine2neura/bert/bert_node1/bert_node1.mlir
@@ -61,7 +61,7 @@ module attributes {} {
 // CHECK-NEXT: "neura.return"() : () -> ()
 // CHECK-NEXT: }
 
-// CTRL2DATA:        func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura"} {
+// CTRL2DATA:        func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x1x1x1x1x128xi8>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x1x1x1x1x128xi8>, i1>) -> !neura.data<memref<?x1x1x1x1x128xi8>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?x1x128x1x1x128xi8>, i1>
diff --git a/test/affine2neura/bert/bert_node28/bert_node28.mlir b/test/affine2neura/bert/bert_node28/bert_node28.mlir
index a12273c8..902bc82c 100644
--- a/test/affine2neura/bert/bert_node28/bert_node28.mlir
+++ b/test/affine2neura/bert/bert_node28/bert_node28.mlir
@@ -81,7 +81,7 @@ module attributes {} {
 // CHECK-NEXT: ^bb9:  // pred: ^bb1
 // CHECK-NEXT: "neura.return"() : () -> ()
 
-// CTRL2DATA:        func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x768x768xf32>, %arg2: memref<?x128x768xf32>) attributes {accelerator = "neura"} {
+// CTRL2DATA:        func.func @_Z11bert_node28PA128_A768_KfPA768_S0_PA128_A768_f(%arg0: memref<?x128x768xf32>, %arg1: memref<?x768x768xf32>, %arg2: memref<?x128x768xf32>) attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x128x768xf32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x128x768xf32>, i1>) -> !neura.data<memref<?x128x768xf32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?x768x768xf32>, i1>
diff --git a/test/c2llvm2mlir/nested_loop/test.mlir b/test/c2llvm2mlir/nested_loop/test.mlir
index bac316db..1c4b7b32 100644
--- a/test/c2llvm2mlir/nested_loop/test.mlir
+++ b/test/c2llvm2mlir/nested_loop/test.mlir
@@ -24,4 +24,4 @@
 // CHECK-LLVM2NEURA: %175 = neura.sext %174 : !neura.data<i32, i1> -> !neura.data<i64, i1>
 // CHECK-LLVM2NEURA: %194 = "neura.mul"(%192, %193) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
 
-// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 17 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 6 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {
\ No newline at end of file
+// CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", dataflow_mode = "predicate", frame_pointer = #llvm.framePointerKind<all>, linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 17 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 6 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} {
\ No newline at end of file
diff --git a/test/code_gen/test_code_generate.mlir b/test/code_gen/test_code_generate.mlir
index 2c155802..e88516e2 100644
--- a/test/code_gen/test_code_generate.mlir
+++ b/test/code_gen/test_code_generate.mlir
@@ -7,7 +7,7 @@
 // RUN:   --insert-data-mov \
 // RUN:   --map-to-accelerator="mapping-strategy=heuristic" \
 // RUN:   --generate-code -o %t-mapping.mlir 
-// RU: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING
+// RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING
 // RUN: FileCheck %s --input-file=tmp-generated-instructions.yaml --check-prefix=YAML
 // RUN: FileCheck %s --input-file=tmp-generated-instructions.asm --check-prefix=ASM
 
@@ -31,7 +31,7 @@ func.func @loop_test() -> f32 {
   return %result : f32
 }
 
-// MAPPING:        func.func @loop_test() -> f32 attributes {accelerator = "neura", mapping_info = {compiled_ii = 6 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
+// MAPPING:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate", mapping_info = {compiled_ii = 6 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
 
 // Each core represents a processing element in the CGRA array
 // Example: column: 1, row: 1 represents the core at position (1,1) in the 4x4 grid
diff --git a/test/controflow_fuse/complex_nested/complex_nested.mlir b/test/controflow_fuse/complex_nested/complex_nested.mlir
index 25fef2fc..a3c0c1a5 100644
--- a/test/controflow_fuse/complex_nested/complex_nested.mlir
+++ b/test/controflow_fuse/complex_nested/complex_nested.mlir
@@ -175,7 +175,7 @@ module attributes {} {
 // CHECK-NEXT:     "neura.return"() : () -> ()
 // CHECK-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref<?x32x32xi32>, %arg1: memref<?x32xi32>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref<?x32x32xi32>, %arg1: memref<?x32xi32>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x32x32xi32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x32x32xi32>, i1>) -> !neura.data<memref<?x32x32xi32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?x32xi32>, i1>
diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir
index 435c9536..418002dd 100644
--- a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir
+++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir
@@ -137,7 +137,7 @@ module attributes {} {
 // CHECK-NEXT:     "neura.return"() : () -> ()
 // CHECK-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref<?x128xi32>, %arg1: memref<?x128xi32>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref<?x128xi32>, %arg1: memref<?x128xi32>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x128xi32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x128xi32>, i1>) -> !neura.data<memref<?x128xi32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?x128xi32>, i1>
diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.mlir b/test/controflow_fuse/perfect_nested/perfect_nested.mlir
index 66f71fe7..3f66227e 100644
--- a/test/controflow_fuse/perfect_nested/perfect_nested.mlir
+++ b/test/controflow_fuse/perfect_nested/perfect_nested.mlir
@@ -112,7 +112,7 @@ module attributes {} {
 // CAST-NEXT:     "neura.return"() : () -> ()
 // CAST-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x1x1x1x1x128xi8>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x1x1x1x1x128xi8>, i1>) -> !neura.data<memref<?x1x1x1x1x128xi8>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?x1x128x1x1x128xi8>, i1>
@@ -192,4 +192,4 @@ module attributes {} {
 // CTRL2DATA-NEXT:   }
 
 
-// MAPPING:      func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 10 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 8 : i32, res_mii = 3 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
+// MAPPING:      func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref<?x1x1x1x1x128xi8>, %arg1: memref<?x1x128x1x1x128xi8>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 10 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 8 : i32, res_mii = 3 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
diff --git a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir
index 978a3f27..cbc554bc 100644
--- a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir
+++ b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir
@@ -103,7 +103,7 @@ module attributes {} {
 // CAST-NEXT:     "neura.return"(%5) : (i32) -> ()
 // CAST-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref<?x128xi32>) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref<?x128xi32>) -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?x128xi32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?x128xi32>, i1>) -> !neura.data<memref<?x128xi32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 1 : i64}> : () -> !neura.data<i64, i1>
diff --git a/test/controflow_fuse/simple_loop/simple_loop.mlir b/test/controflow_fuse/simple_loop/simple_loop.mlir
index 2ec33f73..979954f6 100644
--- a/test/controflow_fuse/simple_loop/simple_loop.mlir
+++ b/test/controflow_fuse/simple_loop/simple_loop.mlir
@@ -126,7 +126,7 @@ module attributes {} {
 // CANONICALIZE-NEXT:     "neura.return"() : () -> ()
 // CANONICALIZE-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?xi32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?xi32>, i1>) -> !neura.data<memref<?xi32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = "%arg1"}> : () -> !neura.data<memref<?xi32>, i1>
@@ -179,7 +179,7 @@ module attributes {} {
 // CTRL2DATA-NEXT:   }
 
 
-// FUSE:        func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// FUSE:        func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // FUSE-NEXT:     %0 = "neura.grant_once"() <{constant_value = "%arg0"}> : () -> !neura.data<memref<?xi32>, i1>
 // FUSE-NEXT:     %1 = "neura.grant_once"() <{constant_value = "%arg1"}> : () -> !neura.data<memref<?xi32>, i1>
 // FUSE-NEXT:     %2 = neura.reserve : !neura.data<memref<?xi32>, i1>
@@ -199,4 +199,4 @@ module attributes {} {
 // FUSE-NEXT:     "neura.return"() : () -> ()
 // FUSE-NEXT:   }
 
-// FUSE-MAPPING:      func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 2 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 2 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
+// FUSE-MAPPING:      func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 2 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 2 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
diff --git a/test/controflow_fuse/simple_loop_reduction/simple_loop_reduction.mlir b/test/controflow_fuse/simple_loop_reduction/simple_loop_reduction.mlir
index 84043c73..8d4b4e5c 100644
--- a/test/controflow_fuse/simple_loop_reduction/simple_loop_reduction.mlir
+++ b/test/controflow_fuse/simple_loop_reduction/simple_loop_reduction.mlir
@@ -116,7 +116,7 @@ module attributes {} {
 // CANONICALIZE-NEXT:     "neura.return"(%16) : (i32) -> ()
 // CANONICALIZE-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = 1 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 128 : i64}> : () -> !neura.data<i64, i1>
@@ -151,7 +151,7 @@ module attributes {} {
 // CTRL2DATA-NEXT:   }
 
 
-// FUSE:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// FUSE:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // FUSE-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0 : i32}> : () -> !neura.data<i32, i1>
 // FUSE-NEXT:     %1 = neura.reserve : !neura.data<i32, i1>
 // FUSE-NEXT:     %2 = "neura.phi"(%1, %0) : (!neura.data<i32, i1>, !neura.data<i32, i1>) -> !neura.data<i32, i1>
@@ -167,4 +167,4 @@ module attributes {} {
 // FUSE-NEXT:   }
 
 
-// FUSE-MAPPING:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
+// FUSE-MAPPING:        func.func @_Z10simpleloopv() -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>, mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
diff --git a/test/mapping_quality/branch_for.mlir b/test/mapping_quality/branch_for.mlir
index f0c89cb3..d9cb1c9f 100644
--- a/test/mapping_quality/branch_for.mlir
+++ b/test/mapping_quality/branch_for.mlir
@@ -115,7 +115,7 @@ func.func @loop_test() -> f32 {
 // CANONICALIZE-NEXT:     "neura.return"(%7) : (f32) -> ()
 // CANONICALIZE-NEXT:   }
 
-// CTRL2DATA:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
@@ -136,7 +136,7 @@ func.func @loop_test() -> f32 {
 // CTRL2DATA-NEXT:     "neura.return"(%14) : (!neura.data<f32, i1>) -> ()
 // CTRL2DATA-NEXT:   }
 
-// FUSE:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// FUSE:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // FUSE-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data<i64, i1>
 // FUSE-NEXT:     %1 = "neura.grant_once"() <{constant_value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // FUSE-NEXT:     %2 = neura.reserve : !neura.data<f32, i1>
@@ -155,7 +155,7 @@ func.func @loop_test() -> f32 {
 // FUSE-NEXT:     "neura.return"(%12) : (!neura.data<f32, i1>) -> ()
 // FUSE-NEXT:   }
 
-// MOV:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// MOV:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // MOV-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data<i64, i1>
 // MOV-NEXT:     %1 = "neura.grant_once"() <{constant_value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // MOV-NEXT:     %2 = neura.reserve : !neura.data<f32, i1>
@@ -188,7 +188,7 @@ func.func @loop_test() -> f32 {
 // MOV-NEXT:   }
 
 // MAPPING:      module {
-// MAPPING-NEXT:   func.func @loop_test() -> f32 attributes {accelerator = "neura", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
+// MAPPING-NEXT:   func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
 
 
 // YAML:      array_config:
diff --git a/test/mapping_quality/tiny_loop.mlir b/test/mapping_quality/tiny_loop.mlir
index 2f8b813a..2be8c77e 100644
--- a/test/mapping_quality/tiny_loop.mlir
+++ b/test/mapping_quality/tiny_loop.mlir
@@ -77,6 +77,6 @@ module {
 // CHECK-NEXT: "neura.return"(%15) : (i64) -> ()
 // CHECK-NEXT: }
 
-// SPATIAL:          func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-only", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
+// SPATIAL:          func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", dataflow_mode = "predicate", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-only", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
 
-// SPATIAL-TEMPORAL:        func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", mapping_info = {compiled_ii = 3 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
+// SPATIAL-TEMPORAL:        func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", dataflow_mode = "predicate", mapping_info = {compiled_ii = 3 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 3 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
\ No newline at end of file
diff --git a/test/neura/ctrl/branch.mlir b/test/neura/ctrl/branch.mlir
index 696991f8..33e0889d 100644
--- a/test/neura/ctrl/branch.mlir
+++ b/test/neura/ctrl/branch.mlir
@@ -54,7 +54,7 @@ func.func @test(%in: i64) -> f32 {
 // CHECK-NEXT:     "neura.return"(%13) : (!neura.data<f32, i1>) -> ()
 // CHECK-NEXT:   }
 
-// CTRL2DATA:   func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA:   func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 1.000000e+00 : f32}> : () -> !neura.data<f32, i1>
diff --git a/test/neura/ctrl/branch_for.mlir b/test/neura/ctrl/branch_for.mlir
index 200aaa1f..9af41c51 100644
--- a/test/neura/ctrl/branch_for.mlir
+++ b/test/neura/ctrl/branch_for.mlir
@@ -115,7 +115,7 @@ func.func @loop_test() -> f32 {
 // CANONICALIZE-NEXT:     "neura.return"(%7) : (f32) -> ()
 // CANONICALIZE-NEXT:   }
 
-// CTRL2DATA:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<i64, i1>) -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
@@ -136,7 +136,7 @@ func.func @loop_test() -> f32 {
 // CTRL2DATA-NEXT:     "neura.return"(%14) : (!neura.data<f32, i1>) -> ()
 // CTRL2DATA-NEXT:   }
 
-// FUSE:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// FUSE:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // FUSE-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data<i64, i1>
 // FUSE-NEXT:     %1 = "neura.grant_once"() <{constant_value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // FUSE-NEXT:     %2 = neura.reserve : !neura.data<f32, i1>
@@ -155,7 +155,7 @@ func.func @loop_test() -> f32 {
 // FUSE-NEXT:     "neura.return"(%12) : (!neura.data<f32, i1>) -> ()
 // FUSE-NEXT:   }
 
-// MOV:        func.func @loop_test() -> f32 attributes {accelerator = "neura"} {
+// MOV:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // MOV-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0 : i64}> : () -> !neura.data<i64, i1>
 // MOV-NEXT:     %1 = "neura.grant_once"() <{constant_value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // MOV-NEXT:     %2 = neura.reserve : !neura.data<f32, i1>
@@ -187,7 +187,7 @@ func.func @loop_test() -> f32 {
 // MOV-NEXT:     "neura.return"(%25) : (!neura.data<f32, i1>) -> ()
 // MOV-NEXT:   }
 
-// MAPPING:        func.func @loop_test() -> f32 attributes {accelerator = "neura", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
+// MAPPING:        func.func @loop_test() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 4 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
 
 // YAML:      array_config:
 // YAML-NEXT:   columns: 4
diff --git a/test/neura/ctrl/branch_with_and_without_arg.mlir b/test/neura/ctrl/branch_with_and_without_arg.mlir
index 76b67e00..39c94c1d 100644
--- a/test/neura/ctrl/branch_with_and_without_arg.mlir
+++ b/test/neura/ctrl/branch_with_and_without_arg.mlir
@@ -51,7 +51,7 @@ func.func @test(%in: i64) -> f32 {
 // CHECK-NEXT:   "neura.return"(%12) : (!neura.data<f32, i1>) -> ()
 // CHECK-NEXT: }
 
-// CTRL2DATA: func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA: func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 1.000000e+00 : f32}> : () -> !neura.data<f32, i1>
diff --git a/test/neura/ctrl/branch_without_arg.mlir b/test/neura/ctrl/branch_without_arg.mlir
index 64478ab2..c4854d4c 100644
--- a/test/neura/ctrl/branch_without_arg.mlir
+++ b/test/neura/ctrl/branch_without_arg.mlir
@@ -53,7 +53,7 @@ func.func @test(%in: i64) -> f32 {
 // CHECK-NEXT:   "neura.return"(%13) : (!neura.data<f32, i1>) -> ()
 // CHECK-NEXT: }
 
-// CTRL2DATA: func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA: func.func @test(%arg0: i64) -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 1.000000e+00 : f32}> : () -> !neura.data<f32, i1>
diff --git a/test/neura/ctrl/for_with_if.mlir b/test/neura/ctrl/for_with_if.mlir
index 99e0ed22..4fc5c2a4 100644
--- a/test/neura/ctrl/for_with_if.mlir
+++ b/test/neura/ctrl/for_with_if.mlir
@@ -87,7 +87,7 @@ module attributes {} {
 // CHECK-NEXT:     "neura.return"(%10) : (i32) -> ()
 // CHECK-NEXT:   }
 
-// CTRL2DATA:        func.func @_Z11for_with_ifPi(%arg0: memref<?xi32>) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage<external>} {
+// CTRL2DATA:        func.func @_Z11for_with_ifPi(%arg0: memref<?xi32>) -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage<external>} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<memref<?xi32>, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.grant_once"(%0) : (!neura.data<memref<?xi32>, i1>) -> !neura.data<memref<?xi32>, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 0 : i32}> : () -> !neura.data<i32, i1>
diff --git a/test/neura/ctrl/nested_branch.mlir b/test/neura/ctrl/nested_branch.mlir
index 5a145a99..9c1cfcae 100644
--- a/test/neura/ctrl/nested_branch.mlir
+++ b/test/neura/ctrl/nested_branch.mlir
@@ -60,7 +60,7 @@ func.func @complex_test(%in: i64) -> f32 {
 // CHECK-NEXT:     "neura.return"(%18) : (!neura.data<f32, i1>) -> ()
 // CHECK-NEXT:   }
 
-// CTRL2DATA:        func.func @complex_test(%arg0: i64) -> f32 attributes {accelerator = "neura"} {
+// CTRL2DATA:        func.func @complex_test(%arg0: i64) -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // CTRL2DATA-NEXT:     %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %1 = "neura.constant"() <{value = 0 : i64}> : () -> !neura.data<i64, i1>
 // CTRL2DATA-NEXT:     %2 = "neura.constant"() <{value = 1.000000e+00 : f32}> : () -> !neura.data<f32, i1>
diff --git a/test/neura/interpreter/loop_convert_controlflow_to_dataflow.mlir b/test/neura/interpreter/loop_convert_controlflow_to_dataflow.mlir
index 62df8609..33be0e3c 100644
--- a/test/neura/interpreter/loop_convert_controlflow_to_dataflow.mlir
+++ b/test/neura/interpreter/loop_convert_controlflow_to_dataflow.mlir
@@ -37,7 +37,7 @@ func.func @loop_sum() -> f32 {
   "neura.return"(%ret_sum) : (f32) -> ()
 }
 
-// DATAFLOW_IR:        func.func @loop_sum() -> f32 attributes {accelerator = "neura"} {
+// DATAFLOW_IR:        func.func @loop_sum() -> f32 attributes {accelerator = "neura", dataflow_mode = "predicate"} {
 // DATAFLOW_IR-NEXT:     %0 = "neura.grant_once"() <{constant_value = 0.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // DATAFLOW_IR-NEXT:     %1 = "neura.grant_once"() <{constant_value = 1.000000e+00 : f32}> : () -> !neura.data<f32, i1>
 // DATAFLOW_IR-NEXT:     %2 = "neura.grant_once"() <{constant_value = 3.000000e+00 : f32}> : () -> !neura.data<f32, i1>
diff --git a/test/neura/steer_ctrl/for_with_if.mlir b/test/neura/steer_ctrl/for_with_if.mlir
new file mode 100644
index 00000000..7085e1aa
--- /dev/null
+++ b/test/neura/steer_ctrl/for_with_if.mlir
@@ -0,0 +1,104 @@
+// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
+// RUN: mlir-neura-opt %t-llvm.mlir \
+// RUN: --assign-accelerator \
+// RUN: --lower-arith-to-neura \
+// RUN: --lower-memref-to-neura \
+// RUN: --lower-builtin-to-neura \
+// RUN: --lower-llvm-to-neura \
+// RUN: --canonicalize-cast \
+// RUN: --canonicalize-live-in \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --transform-to-steer-control \
+// RUN: --remove-predicated-type \
+// RUN: | FileCheck %s
+
+module attributes {} {
+  func.func @_Z11for_with_ifPi(%arg0: memref<?xi32>) -> i32 attributes {llvm.linkage = #llvm.linkage<external>} {
+    %c128 = arith.constant 128 : index
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c-5_i32 = arith.constant -5 : i32
+    %c1_i32 = arith.constant 1 : i32
+    %c2_i32 = arith.constant 2 : i32
+    %c1000_i32 = arith.constant 1000 : i32
+    %c0_i32 = arith.constant 0 : i32
+    %0 = scf.for %arg1 = %c0 to %c128 step %c1 iter_args(%arg2 = %c0_i32) -> (i32) {
+      %1 = arith.cmpi sge, %arg2, %c1000_i32 : i32
+      %2 = scf.if %1 -> (i32) {
+        %7 = arith.addi %arg2, %c-5_i32 : i32
+        scf.yield %7 : i32
+      } else {
+        scf.yield %arg2 : i32
+      }
+      %3 = memref.load %arg0[%arg1] : memref<?xi32>
+      %4 = arith.muli %3, %c2_i32 : i32
+      %5 = arith.addi %4, %c1_i32 : i32
+      %6 = arith.addi %2, %5 : i32
+      scf.yield %6 : i32
+    }
+    return %0 : i32
+  }
+}
+
+// CHECK:      func.func @_Z11for_with_ifPi(%arg0: memref<?xi32>) -> i32 attributes {accelerator = "neura", dataflow_mode = "steering", llvm.linkage = #llvm.linkage<external>} {
+// CHECK-NEXT:   %0 = neura.reserve : i64
+// CHECK-NEXT:   %1 = neura.reserve : i32
+// CHECK-NEXT:   %2 = neura.reserve : i32
+// CHECK-NEXT:   %3 = neura.reserve : memref<?xi32>
+// CHECK-NEXT:   %4 = neura.reserve : i32
+// CHECK-NEXT:   %5 = neura.reserve : i32
+// CHECK-NEXT:   %6 = neura.reserve : i64
+// CHECK-NEXT:   %7 = neura.reserve : i32
+// CHECK-NEXT:   %8 = neura.reserve : i64
+// CHECK-NEXT:   %9 = neura.reserve : i1
+// CHECK-NEXT:   %10 = "neura.constant"() <{value = "%arg0"}> : () -> memref<?xi32>
+// CHECK-NEXT:   %11 = "neura.constant"() <{value = 0 : i32}> : () -> i32
+// CHECK-NEXT:   %12 = "neura.constant"() <{value = 1000 : i32}> : () -> i32
+// CHECK-NEXT:   %13 = "neura.constant"() <{value = 2 : i32}> : () -> i32
+// CHECK-NEXT:   %14 = "neura.constant"() <{value = 1 : i32}> : () -> i32
+// CHECK-NEXT:   %15 = "neura.constant"() <{value = -5 : i32}> : () -> i32
+// CHECK-NEXT:   %16 = "neura.constant"() <{value = 1 : i64}> : () -> i64
+// CHECK-NEXT:   %17 = "neura.constant"() <{value = 128 : i64}> : () -> i64
+// CHECK-NEXT:   %18 = "neura.constant"() <{value = 0 : i64}> : () -> i64
+// CHECK-NEXT:   %19 = neura.carry %16, %9, %0 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %20 = neura.carry %14, %9, %1 : i32, i1, i32 -> i32
+// CHECK-NEXT:   %21 = neura.carry %13, %9, %2 : i32, i1, i32 -> i32
+// CHECK-NEXT:   %22 = neura.carry %10, %9, %3 : memref<?xi32>, i1, memref<?xi32> -> memref<?xi32>
+// CHECK-NEXT:   %23 = neura.carry %15, %9, %4 : i32, i1, i32 -> i32
+// CHECK-NEXT:   %24 = neura.carry %12, %9, %5 : i32, i1, i32 -> i32
+// CHECK-NEXT:   %25 = neura.carry %17, %9, %6 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %26 = neura.carry %11, %9, %7 : i32, i1, i32 -> i32
+// CHECK-NEXT:   %27 = neura.carry %18, %9, %8 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %28 = "neura.icmp"(%27, %25) <{cmpType = "slt"}> : (i64, i64) -> i1
+// CHECK-NEXT:   neura.ctrl_mov %28 -> %9 : i1 i1
+// CHECK-NEXT:   %29 = neura.false_steer %26, %28 : i32, i1 -> i32
+// CHECK-NEXT:   %30 = "neura.icmp"(%26, %24) <{cmpType = "sge"}> : (i32, i32) -> i1
+// CHECK-NEXT:   %31 = neura.true_steer %26, %30 : i32, i1 -> i32
+// CHECK-NEXT:   %32 = neura.true_steer %23, %30 : i32, i1 -> i32
+// CHECK-NEXT:   %33 = neura.false_steer %26, %30 : i32, i1 -> i32
+// CHECK-NEXT:   %34 = "neura.add"(%31, %32) : (i32, i32) -> i32
+// CHECK-NEXT:   %35 = neura.merge %30, %23, %23 : i1, i32, i32 -> i32
+// CHECK-NEXT:   neura.ctrl_mov %35 -> %4 : i32 i32
+// CHECK-NEXT:   %36 = neura.merge %30, %24, %24 : i1, i32, i32 -> i32
+// CHECK-NEXT:   neura.ctrl_mov %36 -> %5 : i32 i32
+// CHECK-NEXT:   %37 = neura.merge %30, %25, %25 : i1, i64, i64 -> i64
+// CHECK-NEXT:   neura.ctrl_mov %37 -> %6 : i64 i64
+// CHECK-NEXT:   %38 = neura.merge %30, %19, %19 : i1, i64, i64 -> i64
+// CHECK-NEXT:   neura.ctrl_mov %38 -> %0 : i64 i64
+// CHECK-NEXT:   %39 = neura.merge %30, %20, %20 : i1, i32, i32 -> i32
+// CHECK-NEXT:   neura.ctrl_mov %39 -> %1 : i32 i32
+// CHECK-NEXT:   %40 = neura.merge %30, %21, %21 : i1, i32, i32 -> i32
+// CHECK-NEXT:   neura.ctrl_mov %40 -> %2 : i32 i32
+// CHECK-NEXT:   %41 = neura.merge %30, %27, %27 : i1, i64, i64 -> i64
+// CHECK-NEXT:   %42 = neura.merge %30, %22, %22 : i1, memref<?xi32>, memref<?xi32> -> memref<?xi32>
+// CHECK-NEXT:   neura.ctrl_mov %42 -> %3 : memref<?xi32> memref<?xi32>
+// CHECK-NEXT:   %43 = neura.merge %30, %34, %33 : i1, i32, i32 -> i32
+// CHECK-NEXT:   %44 = neura.load_indexed %42[%41 : i64] memref<?xi32> : i32
+// CHECK-NEXT:   %45 = "neura.mul"(%44, %40) : (i32, i32) -> i32
+// CHECK-NEXT:   %46 = "neura.add"(%45, %39) : (i32, i32) -> i32
+// CHECK-NEXT:   %47 = "neura.add"(%43, %46) : (i32, i32) -> i32
+// CHECK-NEXT:   neura.ctrl_mov %47 -> %7 : i32 i32
+// CHECK-NEXT:   %48 = "neura.add"(%41, %38) : (i64, i64) -> i64
+// CHECK-NEXT:   neura.ctrl_mov %48 -> %8 : i64 i64
+// CHECK-NEXT:   "neura.return"(%29) : (i32) -> ()
diff --git a/test/neura/steer_ctrl/loop_with_return_value.mlir b/test/neura/steer_ctrl/loop_with_return_value.mlir
new file mode 100644
index 00000000..f191a43b
--- /dev/null
+++ b/test/neura/steer_ctrl/loop_with_return_value.mlir
@@ -0,0 +1,101 @@
+// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
+// RUN: mlir-neura-opt %t-llvm.mlir \
+// RUN: --assign-accelerator \
+// RUN: --lower-arith-to-neura \
+// RUN: --lower-memref-to-neura \
+// RUN: --lower-builtin-to-neura \
+// RUN: --lower-llvm-to-neura \
+// RUN: --canonicalize-cast \
+// RUN: --canonicalize-live-in \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --transform-to-steer-control \
+// RUN: --remove-predicated-type \
+// RUN: | FileCheck %s
+
+// RUN: mlir-neura-opt %t-llvm.mlir \
+// RUN: --assign-accelerator \
+// RUN: --lower-arith-to-neura \
+// RUN: --lower-memref-to-neura \
+// RUN: --lower-builtin-to-neura \
+// RUN: --lower-llvm-to-neura \
+// RUN: --canonicalize-cast \
+// RUN: --canonicalize-live-in \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --transform-to-steer-control \
+// RUN: --remove-predicated-type \
+// RUN: --insert-data-mov 
+// RU: --map-to-accelerator="mapping-strategy=heuristic mapping-mode=spatial-only backtrack-config=customized" 
+// RU: | FileCheck %s -check-prefix=MAPPING
+
+module {
+  func.func @simple_add_loop() -> i64 {
+    %c0 = arith.constant 0 : index
+    %c10 = arith.constant 1 : i64
+    %c1 = arith.constant 1 : index
+    %c16 = arith.constant 16 : index
+
+    %result = scf.for %i = %c0 to %c16 step %c1 iter_args(%acc = %c10) -> (i64) {
+      %sum = arith.addi %acc, %acc : i64
+      scf.yield %sum : i64
+    }
+    return %result : i64
+  }
+}
+
+// CHECK:      func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", dataflow_mode = "steering"} {
+// CHECK-NEXT:   %0 = neura.reserve : i64
+// CHECK-NEXT:   %1 = neura.reserve : i64
+// CHECK-NEXT:   %2 = neura.reserve : i1
+// CHECK-NEXT:   %3 = "neura.constant"() <{value = 16 : i64}> : () -> i64
+// CHECK-NEXT:   %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64
+// CHECK-NEXT:   %5 = "neura.constant"() <{value = 1 : i64}> : () -> i64
+// CHECK-NEXT:   %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
+// CHECK-NEXT:   %7 = neura.invariant %4, %2 : i64, i1 -> i64
+// CHECK-NEXT:   %8 = neura.invariant %3, %2 : i64, i1 -> i64
+// CHECK-NEXT:   %9 = neura.carry %5, %2, %0 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %10 = neura.carry %6, %2, %1 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %11 = "neura.icmp"(%10, %8) <{cmpType = "slt"}> : (i64, i64) -> i1
+// CHECK-NEXT:   neura.ctrl_mov %11 -> %2 : i1 i1
+// CHECK-NEXT:   %12 = neura.false_steer %9, %11 : i64, i1 -> i64
+// CHECK-NEXT:   %13 = "neura.add"(%9, %9) : (i64, i64) -> i64
+// CHECK-NEXT:   neura.ctrl_mov %13 -> %0 : i64 i64
+// CHECK-NEXT:   %14 = "neura.add"(%10, %7) : (i64, i64) -> i64
+// CHECK-NEXT:   neura.ctrl_mov %14 -> %1 : i64 i64
+// CHECK-NEXT:   "neura.return"(%12) : (i64) -> ()
+// CHECK-NEXT: }
+
+// MAPPING:      func.func @simple_add_loop() -> i64 attributes {accelerator = "neura", dataflow_mode = "steering", mapping_info = {compiled_ii = 4 : i32, mapping_mode = "spatial-only", mapping_strategy = "heuristic", rec_mii = 2 : i32, res_mii = 1 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} {
+// MAPPING-NEXT:   %0 = neura.reserve : i64
+// MAPPING-NEXT:   %1 = neura.reserve : i64
+// MAPPING-NEXT:   %2 = neura.reserve : i1
+// MAPPING-NEXT:   %3 = "neura.constant"() <{value = 16 : i64}> {mapping_locs = [{id = 0 : i32, resource = "tile", time_step = 0 : i32, x = 0 : i32, y = 0 : i32}]} : () -> i64
+// MAPPING-NEXT:   %4 = "neura.constant"() <{value = 1 : i64}> {mapping_locs = [{id = 11 : i32, resource = "tile", time_step = 1 : i32, x = 3 : i32, y = 2 : i32}]} : () -> i64
+// MAPPING-NEXT:   %5 = "neura.constant"() <{value = 1 : i64}> {mapping_locs = [{id = 5 : i32, resource = "tile", time_step = 1 : i32, x = 1 : i32, y = 1 : i32}]} : () -> i64
+// MAPPING-NEXT:   %6 = "neura.constant"() <{value = 0 : i64}> {mapping_locs = [{id = 13 : i32, resource = "tile", time_step = 0 : i32, x = 1 : i32, y = 3 : i32}]} : () -> i64
+// MAPPING-NEXT:   %7 = "neura.data_mov"(%4) {mapping_locs = [{id = 35 : i32, resource = "link", time_step = 1 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %8 = neura.invariant %7, %2 {mapping_locs = [{id = 10 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}]} : i64, i1 -> i64
+// MAPPING-NEXT:   %9 = "neura.data_mov"(%3) {mapping_locs = [{id = 0 : i32, resource = "link", time_step = 0 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %10 = neura.invariant %9, %2 {mapping_locs = [{id = 1 : i32, resource = "tile", time_step = 1 : i32, x = 1 : i32, y = 0 : i32}]} : i64, i1 -> i64
+// MAPPING-NEXT:   %11 = "neura.data_mov"(%5) {mapping_locs = [{id = 14 : i32, resource = "link", time_step = 1 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %12 = neura.carry %11, %2, %0 {mapping_locs = [{id = 6 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 1 : i32}]} : i64, i1, i64 -> i64
+// MAPPING-NEXT:   %13 = "neura.data_mov"(%6) {mapping_locs = [{id = 42 : i32, resource = "link", time_step = 0 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %14 = neura.carry %13, %2, %1 {mapping_locs = [{id = 9 : i32, resource = "tile", time_step = 1 : i32, x = 1 : i32, y = 2 : i32}]} : i64, i1, i64 -> i64
+// MAPPING-NEXT:   %15 = "neura.data_mov"(%14) {mapping_locs = [{id = 27 : i32, resource = "link", time_step = 1 : i32}, {id = 25 : i32, resource = "link", time_step = 2 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %16 = "neura.data_mov"(%10) {mapping_locs = [{id = 2 : i32, resource = "link", time_step = 1 : i32}, {id = 1 : i32, resource = "link", time_step = 2 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %17 = "neura.icmp"(%15, %16) <{cmpType = "slt"}> {mapping_locs = [{id = 4 : i32, resource = "tile", time_step = 3 : i32, x = 0 : i32, y = 1 : i32}]} : (i64, i64) -> i1
+// MAPPING-NEXT:   neura.ctrl_mov %17 -> %2 {mapping_locs = [{id = 10 : i32, resource = "link", time_step = 3 : i32}, {id = 16 : i32, resource = "link", time_step = 4 : i32}, {id = 28 : i32, resource = "link", time_step = 5 : i32}]} : i1 i1
+// MAPPING-NEXT:   %18 = "neura.data_mov"(%17) {mapping_locs = [{id = 12 : i32, resource = "link", time_step = 3 : i32}]} : (i1) -> i1
+// MAPPING-NEXT:   %19 = "neura.not"(%18) {mapping_locs = [{id = 8 : i32, resource = "tile", time_step = 4 : i32, x = 0 : i32, y = 2 : i32}]} : (i1) -> i1
+// MAPPING-NEXT:   %20 = "neura.data_mov"(%12) {mapping_locs = [{id = 19 : i32, resource = "link", time_step = 2 : i32}, {id = 64 : i32, resource = "register", time_step = 3 : i32}, {id = 64 : i32, resource = "register", time_step = 4 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %21 = "neura.data_mov"(%12) {mapping_locs = [{id = 17 : i32, resource = "link", time_step = 2 : i32}, {id = 15 : i32, resource = "link", time_step = 3 : i32}, {id = 3 : i32, resource = "link", time_step = 4 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %22 = "neura.add"(%20, %21) {mapping_locs = [{id = 2 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 0 : i32}]} : (i64, i64) -> i64
+// MAPPING-NEXT:   neura.ctrl_mov %22 -> %0 {mapping_locs = [{id = 7 : i32, resource = "link", time_step = 5 : i32}]} : i64 i64
+// MAPPING-NEXT:   %23 = "neura.data_mov"(%14) {mapping_locs = [{id = 30 : i32, resource = "link", time_step = 1 : i32}, {id = 41 : i32, resource = "link", time_step = 2 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %24 = "neura.data_mov"(%8) {mapping_locs = [{id = 34 : i32, resource = "link", time_step = 2 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   %25 = "neura.add"(%23, %24) {mapping_locs = [{id = 14 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 3 : i32}]} : (i64, i64) -> i64
+// MAPPING-NEXT:   neura.ctrl_mov %25 -> %1 {mapping_locs = [{id = 45 : i32, resource = "link", time_step = 3 : i32}, {id = 31 : i32, resource = "link", time_step = 4 : i32}]} : i64 i64
+// MAPPING-NEXT:   %26 = "neura.data_mov"(%12) {mapping_locs = [{id = 18 : i32, resource = "link", time_step = 2 : i32}]} : (i64) -> i64
+// MAPPING-NEXT:   "neura.return"(%26) {mapping_locs = [{id = 7 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 1 : i32}]} : (i64) -> ()
+// MAPPING-NEXT: }
\ No newline at end of file
diff --git a/test/neura/steer_ctrl/loop_without_return_value.mlir b/test/neura/steer_ctrl/loop_without_return_value.mlir
new file mode 100644
index 00000000..d21a18d7
--- /dev/null
+++ b/test/neura/steer_ctrl/loop_without_return_value.mlir
@@ -0,0 +1,55 @@
+// RUN: mlir-opt %s --lower-affine --convert-scf-to-cf --convert-cf-to-llvm -o %t-llvm.mlir
+// RUN: mlir-neura-opt %t-llvm.mlir \
+// RUN: --assign-accelerator \
+// RUN: --lower-arith-to-neura \
+// RUN: --lower-memref-to-neura \
+// RUN: --lower-builtin-to-neura \
+// RUN: --lower-llvm-to-neura \
+// RUN: --canonicalize-cast \
+// RUN: --canonicalize-live-in \
+// RUN: --leverage-predicated-value \
+// RUN: --transform-ctrl-to-data-flow \
+// RUN: --transform-to-steer-control \
+// RUN: --remove-predicated-type \
+// RUN: | FileCheck %s
+
+module attributes {} {
+  func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {llvm.linkage = #llvm.linkage<external>} {
+    %c1_i32 = arith.constant 1 : i32
+    %c2_i32 = arith.constant 2 : i32
+    affine.for %arg2 = 0 to 128 {
+      %0 = affine.load %arg0[%arg2] : memref<?xi32>
+      %1 = arith.muli %0, %c2_i32 : i32
+      %2 = arith.addi %1, %c1_i32 : i32
+      affine.store %2, %arg1[%arg2] : memref<?xi32>
+    }
+    return
+  }
+}
+
+// CHECK:      func.func @_Z11simple_loopPiS_(%arg0: memref<?xi32>, %arg1: memref<?xi32>) attributes {accelerator = "neura", dataflow_mode = "steering", llvm.linkage = #llvm.linkage<external>} {
+// CHECK-NEXT:   %0 = neura.reserve : i64
+// CHECK-NEXT:   %1 = neura.reserve : i1
+// CHECK-NEXT:   %2 = "neura.constant"() <{value = "%arg0"}> : () -> memref<?xi32>
+// CHECK-NEXT:   %3 = "neura.constant"() <{value = "%arg1"}> : () -> memref<?xi32>
+// CHECK-NEXT:   %4 = "neura.constant"() <{value = 1 : i64}> : () -> i64
+// CHECK-NEXT:   %5 = "neura.constant"() <{value = 128 : i64}> : () -> i64
+// CHECK-NEXT:   %6 = "neura.constant"() <{value = 1 : i32}> : () -> i32
+// CHECK-NEXT:   %7 = "neura.constant"() <{value = 2 : i32}> : () -> i32
+// CHECK-NEXT:   %8 = "neura.constant"() <{value = 0 : i64}> : () -> i64
+// CHECK-NEXT:   %9 = neura.invariant %4, %1 : i64, i1 -> i64
+// CHECK-NEXT:   %10 = neura.invariant %3, %1 : memref<?xi32>, i1 -> memref<?xi32>
+// CHECK-NEXT:   %11 = neura.invariant %6, %1 : i32, i1 -> i32
+// CHECK-NEXT:   %12 = neura.invariant %7, %1 : i32, i1 -> i32
+// CHECK-NEXT:   %13 = neura.invariant %2, %1 : memref<?xi32>, i1 -> memref<?xi32>
+// CHECK-NEXT:   %14 = neura.invariant %5, %1 : i64, i1 -> i64
+// CHECK-NEXT:   %15 = neura.carry %8, %1, %0 : i64, i1, i64 -> i64
+// CHECK-NEXT:   %16 = "neura.icmp"(%15, %14) <{cmpType = "slt"}> : (i64, i64) -> i1
+// CHECK-NEXT:   neura.ctrl_mov %16 -> %1 : i1 i1
+// CHECK-NEXT:   %17 = neura.load_indexed %13[%15 : i64] memref<?xi32> : i32
+// CHECK-NEXT:   %18 = "neura.mul"(%17, %12) : (i32, i32) -> i32
+// CHECK-NEXT:   %19 = "neura.add"(%18, %11) : (i32, i32) -> i32
+// CHECK-NEXT:   neura.store_indexed %19 to %10[%15 : i64] memref<?xi32> : i32
+// CHECK-NEXT:   %20 = "neura.add"(%15, %9) : (i64, i64) -> i64
+// CHECK-NEXT:   neura.ctrl_mov %20 -> %0 : i64 i64
+// CHECK-NEXT:   "neura.return"() : () -> ()
diff --git a/test/visualize/test2.mlir b/test/visualize/test2.mlir
index a74ce5da..1c6b3db6 100644
--- a/test/visualize/test2.mlir
+++ b/test/visualize/test2.mlir
@@ -19,7 +19,7 @@ func.func @test_print_op_graph(%a: f32, %b: f32) -> f32 {
 // CHECK-GRAPH: label = "neura.constant : (!neura.data<f32, i1>)
 // CHECK-GRAPH: label = "neura.fadd : (!neura.data<f32, i1>)\n"
 // CHECK-GRAPH: digraph G
-// CHECK-GRAPH: label = "func.func : ()\n\naccelerator: \"neura\"\nfunction_type: (f32, f32) -> f32\nsym_name: \"test_print_op_graph...";
+// CHECK-GRAPH: label = "func.func : ()\n\naccelerator: \"neura\"\ndataflow_mode: \"predicate\"\nfunction_type: (f32, f32) -> f32\nsym_name: \"test_print_op_graph...";
 // CHECK-GRAPH: label = "neura.constant : (!neura.data<f32, i1>)
 // CHECK-GRAPH: label = "neura.data_mov : (!neura.data<f32, i1>)
 // CHECK-GRAPH: label = "neura.fadd : (!neura.data<f32, i1>)\n"