From d209cce731c38d3198154dc13c1aed865d141eff Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Fri, 24 Oct 2025 01:03:18 +0800 Subject: [PATCH 01/12] Add LLVM to NEURA conversion for select and and operations --- include/NeuraDialect/NeuraOps.td | 8 +++++ .../LlvmToNeura/LlvmToNeuraPass.cpp | 30 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index eeb2677a..ff0729ca 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -147,6 +147,14 @@ def Neura_FMinOp : Op { } // Defines a bitwise OR operation. +def Neura_AndOp : Op { + let summary = "Bitwise AND operation"; + let arguments = (ins AnyType:$lhs, AnyType:$rhs); + let results = (outs AnyType:$result); + // let assemblyFormat = "$lhs `,` $rhs `,` attr-dict `:` type($result)"; + let traits = [SameOperandsAndResultElementType]; +} + def Neura_OrOp : Op { let summary = "Bitwise OR operation"; let arguments = (ins AnyType:$lhs, AnyType:$rhs); diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index 1195565d..cce6b861 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -76,6 +76,17 @@ struct LlvmFSubToNeuraFSub : public OpRewritePattern { } }; +struct LlvmAndToNeuraAnd : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(mlir::LLVM::AndOp op, + PatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, op.getType(), op.getLhs(), + op.getRhs()); + return success(); + } +}; + struct LlvmOrToNeuraOr : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -243,6 +254,23 @@ struct LlvmFPToSIToNeuraCast : public OpRewritePattern { } }; +struct LlvmSelectToNeuraSel : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(LLVM::SelectOp op, + PatternRewriter &rewriter) const override { + Value cond = op.getCondition(); + Value true_value = op.getTrueValue(); + Value false_value = op.getFalseValue(); + Type result_type = op.getType(); + + // Note: neura.sel has different argument order: (ifTrue, ifFalse, cond) + rewriter.replaceOpWithNewOp(op, result_type, + true_value, false_value, cond); + return success(); + } +}; + struct LlvmFMulAddToNeuraFMulFAdd : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -723,6 +751,7 @@ struct LowerLlvmToNeuraPass patterns.insert(&getContext()); // Scalar operations patterns.add(&getContext()); + patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); @@ -752,6 +781,7 @@ struct LowerLlvmToNeuraPass patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); + patterns.add(&getContext()); FrozenRewritePatternSet frozen(std::move(patterns)); From 788eff88e8af54c8581a09a9d786927cfb0a4c69 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Fri, 24 Oct 2025 16:27:05 +0800 Subject: [PATCH 02/12] Refactor FoldConstantPass with generic framework and update test --- .../HwAgnosticOpt/FoldConstantPass.cpp | 919 ++++++++++-------- .../constant_folding/simple_loop.mlir | 4 +- 2 files changed, 505 insertions(+), 418 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 62787bac..6137cf5e 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -2,6 +2,7 @@ #include "NeuraDialect/NeuraTypes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Value.h" #include "mlir/IR/ValueRange.h" @@ -12,6 +13,7 @@ #include "llvm/Support/LogicalResult.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace mlir; @@ -19,57 +21,14 @@ using namespace mlir; #include "NeuraDialect/NeuraPasses.h.inc" namespace { -// ========================================= -// FuseConstantAndGrantPattern -// Valid only after transform-ctrl-to-data-flow pass. -// ========================================= -struct FuseConstantAndGrantPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(neura::ConstantOp constant_op, - PatternRewriter &rewriter) const override { - bool made_change = false; - - // Checks if the constant operation is used by a grant_once or grant_always - // operation. - for (auto user : constant_op->getUsers()) { - if (isa(user) || isa(user)) { - if (neura::GrantOnceOp grant_once_op = - dyn_cast(user)) { - auto new_grant_once_op = rewriter.create( - grant_once_op.getLoc(), grant_once_op.getResult().getType(), - /*value=*/nullptr, constant_op->getAttr("value")); - // Replaces the original constant operation with the new one. - rewriter.replaceOp(grant_once_op, new_grant_once_op); - made_change = true; - } else if (neura::GrantAlwaysOp grant_always_op = - dyn_cast(user)) { - auto new_grant_always_op = rewriter.create( - grant_always_op.getLoc(), grant_always_op.getResult().getType(), - /*value=*/nullptr, constant_op->getAttr("value")); - // Replaces the original constant operation with the new one. - rewriter.replaceOp(grant_always_op, new_grant_always_op); - made_change = true; - } - } - } - - if (constant_op->use_empty()) { - // If the constant operation has no users, it can be removed. - rewriter.eraseOp(constant_op); - made_change = true; - } - - return success(made_change); - } -}; // ========================================= -// FoldConstantPass -// Valid before transform-ctrl-to-data-flow pass. +// Helper Functions // ========================================= bool isOriginConstantOp(Value value) { + if (!value) { + return false; + } Operation *def_op = value.getDefiningOp(); if (!def_op || !isa(def_op)) { return false; @@ -95,423 +54,546 @@ void addConstantAttribute(Operation *op, StringRef attr_name, op->setAttr(attr_name, const_value); } -// A template pattern to fuse binary operations with a constant on the -// right-hand side operand. -template -struct FuseRhsConstantPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - // By default, we assume the operation is not commutative. - // If the operation is commutative, we can extend this pattern to support - // constant folding on the left-hand side operand as well. - virtual bool isCommutative() const { return false; } +// ========================================= +// Generic Constant Folding Framework +// ========================================= - virtual Operation * - createOpWithFusedRhsConstant(OpType op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const = 0; +// Structure to hold information about which operands to fold +struct OperandFoldingInfo { + SmallVector const_operand_indices; // Indices of constant operands to fold + SmallVector const_values; // Corresponding constant values + SmallVector all_operands; // All operands (nullptr for folded ones) + SmallVector const_ops_to_clean; // Constant ops to potentially clean up +}; - LogicalResult matchAndRewrite(OpType op, - PatternRewriter &rewriter) const override { - if (op->hasAttr("rhs_value")) { - // Already fused with a constant on the right-hand side. - return failure(); +// Analyzes operands from right to left and determines which to fold +OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { + OperandFoldingInfo info; + + size_t num_operands = op->getNumOperands(); + if (num_operands == 0) { + return info; + } + + // First pass: identify which operands are constants + SmallVector is_const(num_operands, false); + bool has_non_const = false; + + for (size_t i = 0; i < num_operands; ++i) { + if (isOriginConstantOp(op->getOperand(i))) { + is_const[i] = true; + } else { + has_non_const = true; } - - Value lhs = op.getLhs(); - Value rhs = op.getRhs(); - - bool lhs_is_const = isOriginConstantOp(lhs); - bool rhs_is_const = rhs && isOriginConstantOp(rhs); - - if (rhs_is_const) { - auto constant_op = dyn_cast(rhs.getDefiningOp()); - - Attribute rhs_value = getOriginConstantValue(rhs); - Operation *fused_op = - createOpWithFusedRhsConstant(op, lhs, rhs_value, rewriter); - - rewriter.replaceOp(op, fused_op->getResults()); - if (constant_op->use_empty()) { - rewriter.eraseOp(constant_op); + } + + // Second pass: decide which constants to fold + // Build all_operands array (nullptr for folded operands) + info.all_operands.resize(num_operands); + + for (size_t i = 0; i < num_operands; ++i) { + Value operand = op->getOperand(i); + + if (is_const[i]) { + // If this is operand 0 and there are no other non-const operands, + // we must keep it (MLIR operations need at least one operand) + if (i == 0 && !has_non_const) { + info.all_operands[i] = operand; + } else { + // This operand will be folded - mark as nullptr + info.all_operands[i] = nullptr; + info.const_operand_indices.push_back(i); + info.const_values.push_back(getOriginConstantValue(operand)); + info.const_ops_to_clean.push_back(operand.getDefiningOp()); } - return success(); + } else { + // This operand is not a constant, keep it + info.all_operands[i] = operand; } + } + + return info; +} - if (lhs_is_const && !rhs_is_const && isCommutative()) { - auto constant_op = dyn_cast(lhs.getDefiningOp()); - - Attribute lhs_value = getOriginConstantValue(lhs); - Operation *fused_op = - createOpWithFusedRhsConstant(op, rhs, lhs_value, rewriter); - - rewriter.replaceOp(op, fused_op->getResults()); - if (constant_op->use_empty()) { - rewriter.eraseOp(constant_op); - } - return success(); +// Gets the attribute name for a given operand index +// For binary operations, uses "lhs_value" and "rhs_value" +// For other operations, uses "operand_N_value" +std::string getAttributeNameForOperandIndex(size_t index, size_t total_operands) { + if (total_operands == 2) { + // Binary operation: use lhs_value/rhs_value + if (index == 0) { + return "lhs_value"; + } else { + return "rhs_value"; } - - return failure(); + } else { + // Multi-operand operation: use operand_N_value + return "operand_" + std::to_string(index) + "_value"; } -}; - -struct FuseAddRhsConstantPattern : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - bool isCommutative() const override { return true; } +} - Operation * - createOpWithFusedRhsConstant(neura::AddOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; - } -}; +// ========================================= +// Generic Constant Folding Pattern +// ========================================= +template +struct GenericFuseConstantPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; -struct FuseSubRhsConstantPattern : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - Operation * - createOpWithFusedRhsConstant(neura::SubOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; + // Virtual function to get attribute name for a given operand index + // Default implementation uses binary naming (lhs/rhs) or operand_N naming + // Derived classes can override this for custom naming + virtual std::string getAttributeName(size_t operand_idx, size_t total_operands) const { + return getAttributeNameForOperandIndex(operand_idx, total_operands); } -}; - -struct FuseMulRhsConstantPattern : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - bool isCommutative() const override { return true; } - - Operation * - createOpWithFusedRhsConstant(neura::MulOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; + LogicalResult matchAndRewrite(OpType op, + PatternRewriter &rewriter) const override { + // Get the original number of operands before folding + size_t num_operands = op->getNumOperands(); + + // Analyze operands to determine which can be folded + OperandFoldingInfo fold_info = analyzeOperandsForFolding(op); + + // If no constant operands found, nothing to do + if (fold_info.const_operand_indices.empty()) { + return failure(); + } + + // Check if any operands have already been folded + // Look for any attribute ending with "_value" which indicates constant folding + for (auto attr : op->getAttrs()) { + StringRef attr_name = attr.getName().getValue(); + if (attr_name.ends_with("_value")) { + return failure(); + } + } + + // Create the new operation with all operands (nullptr for folded ones) + Operation *new_op = createOpWithFoldedConstants( + op, fold_info.all_operands, rewriter); + + if (!new_op) { + return failure(); + } + + // Add constant attributes for each folded operand + for (size_t i = 0; i < fold_info.const_operand_indices.size(); ++i) { + size_t operand_idx = fold_info.const_operand_indices[i]; + Attribute const_value = fold_info.const_values[i]; + + std::string attr_name = getAttributeName(operand_idx, num_operands); + addConstantAttribute(new_op, attr_name, const_value); + } + + // Replace the old operation + rewriter.replaceOp(op, new_op->getResults()); + + // Clean up unused constant operations + for (Operation *const_op : fold_info.const_ops_to_clean) { + if (const_op->use_empty()) { + rewriter.eraseOp(const_op); + } + } + + return success(); } + + // Virtual function to create the operation with folded constants + // Must be implemented by derived classes + virtual Operation * + createOpWithFoldedConstants(OpType op, ArrayRef non_const_operands, + PatternRewriter &rewriter) const = 0; }; -struct FuseICmpRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - Operation * - createOpWithFusedRhsConstant(neura::ICmpOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr, op.getCmpType()); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; - } -}; +// ========================================= +// Specialized Patterns for Specific Operations +// ========================================= -struct FuseFAddRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - bool isCommutative() const override { return true; } - - Operation * - createOpWithFusedRhsConstant(neura::FAddOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; +// Helper macro to define a pattern for a binary operation +#define DEFINE_BINARY_OP_PATTERN(OP_NAME, OP_TYPE) \ + struct Fuse##OP_NAME##ConstantPattern \ + : public GenericFuseConstantPattern { \ + using GenericFuseConstantPattern::GenericFuseConstantPattern; \ + Operation *createOpWithFoldedConstants( \ + neura::OP_TYPE op, ArrayRef all_operands, \ + PatternRewriter &rewriter) const override { \ + /* Extract only non-null operands */ \ + SmallVector operands; \ + for (Value v : all_operands) { \ + if (v) operands.push_back(v); \ + } \ + /* Use generic Operation create and copy attributes */ \ + OperationState state(op.getLoc(), op.getOperationName()); \ + state.addOperands(operands); \ + state.addTypes(op->getResultTypes()); \ + /* Copy attributes except operandSegmentSizes (will be auto-generated) */ \ + for (auto attr : op->getAttrs()) { \ + if (attr.getName() != "operandSegmentSizes") { \ + state.addAttribute(attr.getName(), attr.getValue()); \ + } \ + } \ + return rewriter.create(state); \ + } \ + }; + +// Define patterns for all binary arithmetic operations +DEFINE_BINARY_OP_PATTERN(Add, AddOp) +DEFINE_BINARY_OP_PATTERN(Sub, SubOp) +DEFINE_BINARY_OP_PATTERN(Mul, MulOp) +DEFINE_BINARY_OP_PATTERN(Div, DivOp) +DEFINE_BINARY_OP_PATTERN(Rem, RemOp) +DEFINE_BINARY_OP_PATTERN(FAdd, FAddOp) +DEFINE_BINARY_OP_PATTERN(FSub, FSubOp) +DEFINE_BINARY_OP_PATTERN(FMul, FMulOp) + +// Special case for ICmp with cmp_type attribute +struct FuseICmpConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + Operation *createOpWithFoldedConstants( + neura::ICmpOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // Extract only non-null operands + SmallVector operands; + for (Value v : all_operands) { + if (v) operands.push_back(v); + } + + // Use generic Operation create and copy attributes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + // Copy attributes except operandSegmentSizes (will be auto-generated) + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + return rewriter.create(state); } }; -struct FuseFSubRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - Operation * - createOpWithFusedRhsConstant(neura::FSubOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; +// Special case for FMax with nan_semantic attribute +struct FuseFMaxConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + Operation *createOpWithFoldedConstants( + neura::FMaxOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // Extract only non-null operands + SmallVector operands; + for (Value v : all_operands) { + if (v) operands.push_back(v); + } + + // Use generic Operation create and copy attributes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + // Copy attributes except operandSegmentSizes (will be auto-generated) + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + return rewriter.create(state); } }; -struct FuseFMulRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - bool isCommutative() const override { return true; } - - Operation * - createOpWithFusedRhsConstant(neura::FMulOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; +// Special case for FMin with nan_semantic attribute +struct FuseFMinConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + Operation *createOpWithFoldedConstants( + neura::FMinOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // Extract only non-null operands + SmallVector operands; + for (Value v : all_operands) { + if (v) operands.push_back(v); + } + + // Use generic Operation create and copy attributes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + // Copy attributes except operandSegmentSizes (will be auto-generated) + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + return rewriter.create(state); } }; -struct FuseFMaxRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - bool isCommutative() const override { return true; } - - Operation * - createOpWithFusedRhsConstant(neura::FMaxOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr, op.getNanSemantic()); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; +// Pattern for GEP operation (base + indices) +struct FuseGEPConstantPattern : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + // GEP always uses lhs_value for base (operand 0) + std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { + if (operand_idx == 0) { + return "lhs_value"; + } else { + return "operand_" + std::to_string(operand_idx) + "_value"; + } } -}; - -struct FuseFMinRhsConstantPattern - : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - bool isCommutative() const override { return true; } - - Operation * - createOpWithFusedRhsConstant(neura::FMinOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr, op.getNanSemantic()); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; + + Operation *createOpWithFoldedConstants( + neura::GEP op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // GEP: operand 0 is base, rest are indices + Value base = all_operands[0]; + SmallVector indices; + for (size_t i = 1; i < all_operands.size(); ++i) { + if (all_operands[i]) { + indices.push_back(all_operands[i]); + } + } + + // Build operand list and calculate segment sizes + SmallVector operands; + int32_t num_base = 0; + if (base) { + operands.push_back(base); + num_base = 1; + } + for (Value idx : indices) { + operands.push_back(idx); + } + int32_t num_indices = indices.size(); + + // Create operation with proper operandSegmentSizes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + + // Copy attributes except operandSegmentSizes + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + + // Set the correct operandSegmentSizes + state.addAttribute("operandSegmentSizes", + rewriter.getDenseI32ArrayAttr({num_base, num_indices})); + + return rewriter.create(state); } }; -struct FuseDivRhsConstantPattern : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - Operation * - createOpWithFusedRhsConstant(neura::DivOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; +// Pattern for Store operation (value, addr) +struct FuseStoreConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + // Store uses lhs_value for value (operand 0) and rhs_value for addr (operand 1) + std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { + if (operand_idx == 0) { + return "lhs_value"; + } else if (operand_idx == 1) { + return "rhs_value"; + } else { + return "operand_" + std::to_string(operand_idx) + "_value"; + } } -}; - -struct FuseRemRhsConstantPattern : public FuseRhsConstantPattern { - using FuseRhsConstantPattern::FuseRhsConstantPattern; - - Operation * - createOpWithFusedRhsConstant(neura::RemOp op, Value non_const_operand, - Attribute rhs_value, - PatternRewriter &rewriter) const override { - auto fused_op = rewriter.create( - op.getLoc(), op.getResult().getType(), non_const_operand, - /*rhs=*/nullptr); - addConstantAttribute(fused_op, "rhs_value", rhs_value); - return fused_op; + + Operation *createOpWithFoldedConstants( + neura::StoreOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // Store has two operands: value (operand 0) and addr (operand 1) + // Build operand list with only non-null values + SmallVector operands; + for (Value v : all_operands) { + if (v) operands.push_back(v); + } + + // Use generic Operation create and copy attributes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + // Copy attributes except operandSegmentSizes (will be auto-generated) + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + return rewriter.create(state); } }; -// ========================================= -// FuseGepBaseConstantPattern -// Folds constant base pointer for GEP operation. -// ========================================= -struct FuseGepBaseConstantPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(neura::GEP gep_op, - PatternRewriter &rewriter) const override { - Value base = gep_op.getBase(); - - // Checks if base exists and is a constant. - if (!base || !isOriginConstantOp(base)) { - return failure(); +// Pattern for LoadIndexed operation (base + indices) +struct FuseLoadIndexedConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + // LoadIndexed uses lhs_value for base (operand 0) + std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { + if (operand_idx == 0) { + return "lhs_value"; + } else { + return "operand_" + std::to_string(operand_idx) + "_value"; } - - auto constant_op = dyn_cast(base.getDefiningOp()); - Attribute base_value = getOriginConstantValue(base); - - // Gets all indices (everything after base). + } + + Operation *createOpWithFoldedConstants( + neura::LoadIndexedOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // LoadIndexed: operand 0 is base, rest are indices + Value base = all_operands[0]; SmallVector indices; - for (Value operand : gep_op.getIndices()) { - indices.push_back(operand); + for (size_t i = 1; i < all_operands.size(); ++i) { + if (all_operands[i]) { + indices.push_back(all_operands[i]); + } } - - // Creates new GEP with no base but with lhs_value attribute. - auto fused_gep = rewriter.create( - gep_op.getLoc(), - gep_op.getResult().getType(), - /*base=*/nullptr, - indices); - // TODO: Gather all the attribute -- https://github.com/coredac/dataflow/issues/145 - addConstantAttribute(fused_gep, "lhs_value", base_value); - - // Replaces the original GEP. - rewriter.replaceOp(gep_op, fused_gep); - // Cleans up constant if no longer used. - if (constant_op->use_empty()) { - rewriter.eraseOp(constant_op); + // Build operand list and calculate segment sizes + SmallVector operands; + int32_t num_base = 0; + if (base) { + operands.push_back(base); + num_base = 1; + } + for (Value idx : indices) { + operands.push_back(idx); } + int32_t num_indices = indices.size(); - return success(); - } -}; - -// ========================================= -// FuseStoreAddrConstantPattern -// Folds constant destination pointer for Store operation. -// ========================================= -struct FuseStoreAddrConstantPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(neura::StoreOp store_op, - PatternRewriter &rewriter) const override { - Value addr = store_op.getAddr(); + // Create operation with proper operandSegmentSizes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); - // Checks if address exists and is a constant. - if (!addr || !isOriginConstantOp(addr)) { - return failure(); + // Copy attributes except operandSegmentSizes + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } } - - auto constant_op = dyn_cast(addr.getDefiningOp()); - Attribute addr_value = getOriginConstantValue(addr); - - // Creates new Store with no addr but with rhs_value attribute. - auto fused_store = rewriter.create( - store_op.getLoc(), - store_op.getValue(), // Keeps the value operand. - /*addr=*/nullptr); // Removes addr operand. - addConstantAttribute(fused_store, "rhs_value", addr_value); - - // Replaces the original Store. - rewriter.replaceOp(store_op, fused_store); - // Cleans up constant if no longer used. - if (constant_op->use_empty()) { - rewriter.eraseOp(constant_op); - } + // Set the correct operandSegmentSizes + state.addAttribute("operandSegmentSizes", + rewriter.getDenseI32ArrayAttr({num_base, num_indices})); - return success(); + return rewriter.create(state); } }; -// ========================================= -// FuseLoadIndexedBaseConstantPattern -// Folds constant base pointer for LoadIndexed operation. -// ========================================= -struct FuseLoadIndexedBaseConstantPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(neura::LoadIndexedOp load_indexed_op, - PatternRewriter &rewriter) const override { - Value base = load_indexed_op.getBase(); - - // Checks if base exists and is a constant. - if (!base || !isOriginConstantOp(base)) { - return failure(); +// Pattern for StoreIndexed operation (value, base, indices...) +struct FuseStoreIndexedConstantPattern + : public GenericFuseConstantPattern { + using GenericFuseConstantPattern::GenericFuseConstantPattern; + + // StoreIndexed uses lhs_value for value (operand 0) and rhs_value for base (operand 1) + std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { + if (operand_idx == 0) { + return "lhs_value"; + } else if (operand_idx == 1) { + return "rhs_value"; + } else { + return "operand_" + std::to_string(operand_idx) + "_value"; } - - auto constant_op = dyn_cast(base.getDefiningOp()); - Attribute base_const_value = getOriginConstantValue(base); - - // Gets all indices. + } + + Operation *createOpWithFoldedConstants( + neura::StoreIndexedOp op, ArrayRef all_operands, + PatternRewriter &rewriter) const override { + // StoreIndexed: operand 0 is value, operand 1 is base, rest are indices + Value value = all_operands[0]; + Value base = all_operands.size() > 1 ? all_operands[1] : Value(); SmallVector indices; - for (Value idx : load_indexed_op.getIndices()) { - indices.push_back(idx); + for (size_t i = 2; i < all_operands.size(); ++i) { + if (all_operands[i]) { + indices.push_back(all_operands[i]); + } } - - // Creates new LoadIndexed with no base but with lhs_value attribute. - auto fused_load_indexed = rewriter.create( - load_indexed_op.getLoc(), - load_indexed_op.getResult().getType(), - /*base=*/nullptr, - indices); - addConstantAttribute(fused_load_indexed, "lhs_value", base_const_value); - - // Replaces the original LoadIndexed. - rewriter.replaceOp(load_indexed_op, fused_load_indexed); - // Cleans up constant if no longer used. - if (constant_op->use_empty()) { - rewriter.eraseOp(constant_op); + // Build operand list and calculate segment sizes + SmallVector operands; + int32_t num_value = 0; + if (value) { + operands.push_back(value); + num_value = 1; + } + int32_t num_base = 0; + if (base) { + operands.push_back(base); + num_base = 1; } + for (Value idx : indices) { + operands.push_back(idx); + } + int32_t num_indices = indices.size(); - return success(); + // Create operation with proper operandSegmentSizes + OperationState state(op.getLoc(), op.getOperationName()); + state.addOperands(operands); + state.addTypes(op->getResultTypes()); + + // Copy attributes except operandSegmentSizes + for (auto attr : op->getAttrs()) { + if (attr.getName() != "operandSegmentSizes") { + state.addAttribute(attr.getName(), attr.getValue()); + } + } + + // Set the correct operandSegmentSizes + state.addAttribute("operandSegmentSizes", + rewriter.getDenseI32ArrayAttr({num_value, num_base, num_indices})); + + return rewriter.create(state); } }; // ========================================= -// FuseStoreIndexedBaseConstantPattern -// Folds constant base pointer for StoreIndexed operation. +// FuseConstantAndGrantPattern +// Valid only after transform-ctrl-to-data-flow pass. // ========================================= -struct FuseStoreIndexedBaseConstantPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct FuseConstantAndGrantPattern + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(neura::StoreIndexedOp store_indexed_op, + LogicalResult matchAndRewrite(neura::ConstantOp constant_op, PatternRewriter &rewriter) const override { - Value base = store_indexed_op.getBase(); - - // Checks if base exists and is a constant. - if (!base || !isOriginConstantOp(base)) { - return failure(); - } - - auto constant_op = dyn_cast(base.getDefiningOp()); - Attribute base_const_value = getOriginConstantValue(base); + bool made_change = false; - // Gets all indices. - SmallVector indices; - for (Value idx : store_indexed_op.getIndices()) { - indices.push_back(idx); + // Checks if the constant operation is used by a grant_once or grant_always + // operation. + for (auto user : constant_op->getUsers()) { + if (isa(user) || isa(user)) { + if (neura::GrantOnceOp grant_once_op = + dyn_cast(user)) { + auto new_grant_once_op = rewriter.create( + grant_once_op.getLoc(), grant_once_op.getResult().getType(), + /*value=*/nullptr, constant_op->getAttr("value")); + // Replaces the original constant operation with the new one. + rewriter.replaceOp(grant_once_op, new_grant_once_op); + made_change = true; + } else if (neura::GrantAlwaysOp grant_always_op = + dyn_cast(user)) { + auto new_grant_always_op = rewriter.create( + grant_always_op.getLoc(), grant_always_op.getResult().getType(), + /*value=*/nullptr, constant_op->getAttr("value")); + // Replaces the original constant operation with the new one. + rewriter.replaceOp(grant_always_op, new_grant_always_op); + made_change = true; + } + } } - // Creates new StoreIndexed with no base but with rhs_value attribute. - auto fused_store_indexed = rewriter.create( - store_indexed_op.getLoc(), - store_indexed_op.getValue(), // Keeps the value operand. - /*base=*/nullptr, - indices); - addConstantAttribute(fused_store_indexed, "rhs_value", base_const_value); - - // Replaces the original StoreIndexed. - rewriter.replaceOp(store_indexed_op, fused_store_indexed); - - // Cleans up constant if no longer used. if (constant_op->use_empty()) { + // If the constant operation has no users, it can be removed. rewriter.eraseOp(constant_op); + made_change = true; } - - return success(); + + return success(made_change); } }; @@ -524,30 +606,35 @@ struct FoldConstantPass StringRef getArgument() const override { return "fold-constant"; } StringRef getDescription() const override { - return "Fold constant operations."; + return "Fold constant operations into operation attributes."; } void runOnOperation() override { ModuleOp module_op = getOperation(); RewritePatternSet patterns(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - + // Add generic constant folding patterns for all operations + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + + // Add patterns for memory operations + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + patterns.add(&getContext()); + + // Add pattern for grant operations (post-transform) patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); - patterns.add(&getContext()); + FrozenRewritePatternSet frozen(std::move(patterns)); // Applies to every region inside the module (regardless of func type, diff --git a/test/optimization/constant_folding/simple_loop.mlir b/test/optimization/constant_folding/simple_loop.mlir index 5859b4f2..483a042c 100644 --- a/test/optimization/constant_folding/simple_loop.mlir +++ b/test/optimization/constant_folding/simple_loop.mlir @@ -48,8 +48,8 @@ module { // FOLD-NEXT: neura.cond_br %2 : i1 then to ^bb2 else to ^bb3 // FOLD-NEXT: ^bb2: // pred: ^bb1 // FOLD-NEXT: %3 = neura.load_indexed [%1 : i64] {lhs_value = "%arg0"} : i32 -// FOLD-NEXT: %4 = "neura.mul"(%3) {rhs_value = 2 : i32} : (i32) -> i32 -// FOLD-NEXT: %5 = "neura.add"(%3) {rhs_value = 1 : i32} : (i32) -> i32 +// FOLD-NEXT: %4 = "neura.mul"(%3) {lhs_value = 2 : i32} : (i32) -> i32 +// FOLD-NEXT: %5 = "neura.add"(%3) {lhs_value = 1 : i32} : (i32) -> i32 // FOLD-NEXT: neura.store_indexed %5 to [%1 : i64] {rhs_value = "%arg1"} : i32 // FOLD-NEXT: %6 = "neura.cast"(%3) <{cast_type = "sitofp"}> : (i32) -> f32 // FOLD-NEXT: %7 = "neura.fmul"(%6) {rhs_value = 2.500000e+00 : f32} : (f32) -> f32 From d43596b43304f64b2f1e5d094c9675b8b0ce4c9e Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Fri, 24 Oct 2025 21:37:40 +0800 Subject: [PATCH 03/12] Fix LoadIndexed/StoreIndexed constant folding to only fold base operand --- .../HwAgnosticOpt/FoldConstantPass.cpp | 163 +++++++++++------- 1 file changed, 97 insertions(+), 66 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 6137cf5e..374a0482 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -428,126 +428,157 @@ struct FuseStoreConstantPattern }; // Pattern for LoadIndexed operation (base + indices) +// Only folds the base, never folds indices (required by assemblyFormat) struct FuseLoadIndexedConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - // LoadIndexed uses lhs_value for base (operand 0) - std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { - if (operand_idx == 0) { - return "lhs_value"; - } else { - return "operand_" + std::to_string(operand_idx) + "_value"; - } - } - - Operation *createOpWithFoldedConstants( - neura::LoadIndexedOp op, ArrayRef all_operands, - PatternRewriter &rewriter) const override { - // LoadIndexed: operand 0 is base, rest are indices - Value base = all_operands[0]; - SmallVector indices; - for (size_t i = 1; i < all_operands.size(); ++i) { - if (all_operands[i]) { - indices.push_back(all_operands[i]); - } + LogicalResult matchAndRewrite(neura::LoadIndexedOp op, + PatternRewriter &rewriter) const override { + // Check if already folded + if (op->hasAttr("lhs_value")) { + return failure(); } - // Build operand list and calculate segment sizes - SmallVector operands; - int32_t num_base = 0; - if (base) { - operands.push_back(base); - num_base = 1; + // Only check if base is a constant + Value base = op.getBase(); + if (!base || !isOriginConstantOp(base)) { + return failure(); } - for (Value idx : indices) { - operands.push_back(idx); + + auto constant_op = dyn_cast(base.getDefiningOp()); + Attribute base_value = getOriginConstantValue(base); + + // Keep all indices unchanged (never fold indices) + SmallVector indices; + for (Value idx : op.getIndices()) { + indices.push_back(idx); } - int32_t num_indices = indices.size(); - // Create operation with proper operandSegmentSizes + // Create new LoadIndexed without base OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(operands); + state.addOperands(indices); // Only indices, no base state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes + // Copy all attributes except operandSegmentSizes for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Set the correct operandSegmentSizes + // Add the folded base value + state.addAttribute("lhs_value", base_value); + + // Set operandSegmentSizes: 0 base, N indices state.addAttribute("operandSegmentSizes", - rewriter.getDenseI32ArrayAttr({num_base, num_indices})); + rewriter.getDenseI32ArrayAttr({0, static_cast(indices.size())})); - return rewriter.create(state); + Operation *new_op = rewriter.create(state); + rewriter.replaceOp(op, new_op->getResults()); + + // Clean up constant if no longer used + if (constant_op->use_empty()) { + rewriter.eraseOp(constant_op); + } + + return success(); } }; // Pattern for StoreIndexed operation (value, base, indices...) +// Only folds value and base, never folds indices (required by assemblyFormat) struct FuseStoreIndexedConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - // StoreIndexed uses lhs_value for value (operand 0) and rhs_value for base (operand 1) - std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { - if (operand_idx == 0) { - return "lhs_value"; - } else if (operand_idx == 1) { - return "rhs_value"; - } else { - return "operand_" + std::to_string(operand_idx) + "_value"; + LogicalResult matchAndRewrite(neura::StoreIndexedOp op, + PatternRewriter &rewriter) const override { + // Check if already folded + if (op->hasAttr("lhs_value") || op->hasAttr("rhs_value")) { + return failure(); } - } - - Operation *createOpWithFoldedConstants( - neura::StoreIndexedOp op, ArrayRef all_operands, - PatternRewriter &rewriter) const override { - // StoreIndexed: operand 0 is value, operand 1 is base, rest are indices - Value value = all_operands[0]; - Value base = all_operands.size() > 1 ? all_operands[1] : Value(); + + // Check which of value/base are constants + Value value = op.getValue(); + Value base = op.getBase(); + + bool value_is_const = value && isOriginConstantOp(value); + bool base_is_const = base && isOriginConstantOp(base); + + // Nothing to fold if neither is constant + if (!value_is_const && !base_is_const) { + return failure(); + } + + // Keep all indices unchanged (never fold indices) SmallVector indices; - for (size_t i = 2; i < all_operands.size(); ++i) { - if (all_operands[i]) { - indices.push_back(all_operands[i]); - } + for (Value idx : op.getIndices()) { + indices.push_back(idx); } - // Build operand list and calculate segment sizes + // Build the new operand list SmallVector operands; int32_t num_value = 0; - if (value) { + int32_t num_base = 0; + + if (!value_is_const && value) { operands.push_back(value); num_value = 1; } - int32_t num_base = 0; - if (base) { + + if (!base_is_const && base) { operands.push_back(base); num_base = 1; } + for (Value idx : indices) { operands.push_back(idx); } int32_t num_indices = indices.size(); - // Create operation with proper operandSegmentSizes + // Create new StoreIndexed OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes + // Copy all attributes except operandSegmentSizes for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Set the correct operandSegmentSizes + // Add folded constant attributes + if (value_is_const) { + state.addAttribute("lhs_value", getOriginConstantValue(value)); + } + if (base_is_const) { + state.addAttribute("rhs_value", getOriginConstantValue(base)); + } + + // Set operandSegmentSizes: num_value, num_base, num_indices state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({num_value, num_base, num_indices})); - return rewriter.create(state); + Operation *new_op = rewriter.create(state); + rewriter.replaceOp(op, new_op->getResults()); + + // Clean up unused constants + if (value_is_const) { + auto const_op = value.getDefiningOp(); + if (const_op->use_empty()) { + rewriter.eraseOp(const_op); + } + } + if (base_is_const) { + auto const_op = base.getDefiningOp(); + if (const_op->use_empty()) { + rewriter.eraseOp(const_op); + } + } + + return success(); } }; From ff2b9cbb408fb3db3c51a56f68135449f57d6532 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Fri, 24 Oct 2025 22:42:04 +0800 Subject: [PATCH 04/12] Fix block ordering bug in TransformCtrlToDataFlow pass using RPO traversal --- .../TransformCtrlToDataFlowPass.cpp | 37 +++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index 7875c7a5..0486d2f5 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -485,11 +485,42 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, } // Flattens blocks into the entry block. + // Sort blocks by reverse post-order traversal to maintain SSA dominance Block *entry_block = ®ion.front(); SmallVector blocks_to_flatten; - for (Block &block : region) { - if (&block != entry_block) { - blocks_to_flatten.push_back(&block); + + // Use reverse post-order: visit successors before predecessors + // This ensures that when we move blocks, definitions come before uses + llvm::SetVector visited; + SmallVector rpo_order; + + std::function rpo_traverse = [&](Block *block) { + if (!visited.insert(block)) { + return; + } + + // Visit successors first (post-order) + Operation *terminator = block->getTerminator(); + if (auto br = dyn_cast(terminator)) { + rpo_traverse(br.getDest()); + } else if (auto cond_br = dyn_cast(terminator)) { + rpo_traverse(cond_br.getTrueDest()); + rpo_traverse(cond_br.getFalseDest()); + } + + // Add to reverse post-order + rpo_order.push_back(block); + }; + + rpo_traverse(entry_block); + + // Reverse to get correct order (forward traversal) + std::reverse(rpo_order.begin(), rpo_order.end()); + + // Collect non-entry blocks in RPO order + for (Block *block : rpo_order) { + if (block != entry_block) { + blocks_to_flatten.push_back(block); } } From cc0c50d363382352bd24dce1230f43ae1cb307bb Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 01:24:50 +0800 Subject: [PATCH 05/12] Update testcases for TransformCtrlToDataFlow using RPO traversal --- .../bert/bert_node1/bert_node1.mlir | 22 +- .../bert/bert_node28/bert_node28.mlir | 140 ++-- test/c2llvm2mlir/nested_loop/test.mlir | 4 +- .../complex_nested/complex_nested.mlir | 761 +++++++++--------- .../non_perfect_nested.mlir | 209 +++-- .../perfect_nested/perfect_nested.mlir | 28 +- .../perfect_reduction/perfect_reduction.mlir | 87 +- 7 files changed, 624 insertions(+), 627 deletions(-) diff --git a/test/affine2neura/bert/bert_node1/bert_node1.mlir b/test/affine2neura/bert/bert_node1/bert_node1.mlir index 0c3c097b..6503698b 100644 --- a/test/affine2neura/bert/bert_node1/bert_node1.mlir +++ b/test/affine2neura/bert/bert_node1/bert_node1.mlir @@ -121,22 +121,22 @@ module attributes {} { // CTRL2DATA-NEXT: %55 = neura.grant_predicate %36, %51 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %56 = neura.grant_predicate %38, %51 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %57 = neura.grant_predicate %34, %51 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %58 = neura.load_indexed %44[%45, %45, %45, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %58 to %47[%45, %45, %48, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %59 = "neura.add"(%46, %49) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %59 -> %41 : !neura.data !neura.data +// CTRL2DATA-NEXT: %58 = "neura.add"(%52, %53) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %58 -> %20 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %16 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %14 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %12 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %10 : !neura.data !neura.data +// CTRL2DATA-NEXT: %59 = neura.load_indexed %44[%45, %45, %45, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %59 to %47[%45, %45, %48, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %60 = "neura.add"(%46, %49) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %60 -> %41 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %50 -> %39 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %44 -> %37 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %45 -> %35 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %33 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %48 -> %31 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %49 -> %29 : !neura.data !neura.data -// CTRL2DATA-NEXT: %60 = "neura.add"(%52, %53) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %60 -> %20 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %18 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %16 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %14 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %12 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %10 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () // CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/affine2neura/bert/bert_node28/bert_node28.mlir b/test/affine2neura/bert/bert_node28/bert_node28.mlir index 2574d8e1..5db87e45 100644 --- a/test/affine2neura/bert/bert_node28/bert_node28.mlir +++ b/test/affine2neura/bert/bert_node28/bert_node28.mlir @@ -159,76 +159,8 @@ module attributes {} { // CTRL2DATA-NEXT: %73 = neura.grant_predicate %44, %67 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %74 = neura.grant_predicate %42, %67 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %75 = neura.grant_predicate %40, %67 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %76 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %77 = "neura.phi"(%76, %66) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %78 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %79 = "neura.phi"(%78, %65) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %80 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %81 = "neura.phi"(%80, %64) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %82 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %83 = "neura.phi"(%82, %63) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %84 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %85 = "neura.phi"(%84, %62) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %86 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %87 = "neura.phi"(%86, %61) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %88 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %89 = "neura.phi"(%88, %58) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %90 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %91 = "neura.phi"(%90, %60) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %92 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %93 = "neura.phi"(%92, %59) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %94 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %95 = "neura.phi"(%94, %58) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %96 = "neura.icmp"(%95, %93) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %97 = neura.grant_predicate %91, %96 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %98 = neura.grant_predicate %89, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %99 = neura.grant_predicate %87, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %100 = neura.grant_predicate %95, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %101 = neura.grant_predicate %85, %96 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %102 = neura.grant_predicate %83, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %103 = neura.grant_predicate %81, %96 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %104 = neura.grant_predicate %79, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %105 = neura.grant_predicate %93, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %106 = neura.grant_predicate %77, %96 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %107 = "neura.not"(%96) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %108 = neura.grant_predicate %83, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %109 = neura.grant_predicate %79, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %110 = neura.grant_predicate %93, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %111 = neura.grant_predicate %89, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %112 = neura.grant_predicate %87, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %113 = neura.grant_predicate %77, %107 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %114 = neura.grant_predicate %91, %107 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %115 = neura.grant_predicate %85, %107 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %116 = neura.grant_predicate %81, %107 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %117 = neura.load_indexed %97[%98, %99, %100 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %118 = neura.load_indexed %101[%98, %100, %102 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %119 = neura.load_indexed %103[%98, %99, %102 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %120 = "neura.fmul"(%117, %118) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %121 = "neura.fadd"(%119, %120) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %121 to %103[%98, %99, %102 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %122 = "neura.add"(%100, %104) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %122 -> %94 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %105 -> %92 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %97 -> %90 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %98 -> %88 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %99 -> %86 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %101 -> %84 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %102 -> %82 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %103 -> %80 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %104 -> %78 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %106 -> %76 : !neura.data !neura.data -// CTRL2DATA-NEXT: %123 = "neura.add"(%108, %109) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %123 -> %55 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %110 -> %53 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %111 -> %51 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %112 -> %49 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %109 -> %47 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %113 -> %45 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %114 -> %43 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %115 -> %41 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %116 -> %39 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: %124 = "neura.add"(%68, %69) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %124 -> %28 : !neura.data !neura.data +// CTRL2DATA-NEXT: %76 = "neura.add"(%68, %69) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %76 -> %28 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %70 -> %26 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %71 -> %24 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %72 -> %22 : !neura.data !neura.data @@ -236,5 +168,73 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %73 -> %18 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %74 -> %16 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %75 -> %14 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: %77 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %78 = "neura.phi"(%77, %66) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %79 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %80 = "neura.phi"(%79, %65) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %81 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %82 = "neura.phi"(%81, %64) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %83 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %84 = "neura.phi"(%83, %63) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %85 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %86 = "neura.phi"(%85, %62) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %87 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %88 = "neura.phi"(%87, %61) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %89 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %90 = "neura.phi"(%89, %58) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %91 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %92 = "neura.phi"(%91, %60) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %93 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %94 = "neura.phi"(%93, %59) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %95 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %96 = "neura.phi"(%95, %58) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %97 = "neura.icmp"(%96, %94) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %98 = neura.grant_predicate %92, %97 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %99 = neura.grant_predicate %90, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %100 = neura.grant_predicate %88, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %101 = neura.grant_predicate %96, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %102 = neura.grant_predicate %86, %97 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %103 = neura.grant_predicate %84, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %104 = neura.grant_predicate %82, %97 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %105 = neura.grant_predicate %80, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %106 = neura.grant_predicate %94, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %107 = neura.grant_predicate %78, %97 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %108 = "neura.not"(%97) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %109 = neura.grant_predicate %84, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %110 = neura.grant_predicate %80, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %111 = neura.grant_predicate %94, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %112 = neura.grant_predicate %90, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %113 = neura.grant_predicate %88, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %114 = neura.grant_predicate %78, %108 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %115 = neura.grant_predicate %92, %108 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %116 = neura.grant_predicate %86, %108 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %117 = neura.grant_predicate %82, %108 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %118 = "neura.add"(%109, %110) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %118 -> %55 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %111 -> %53 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %112 -> %51 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %113 -> %49 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %110 -> %47 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %114 -> %45 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %115 -> %43 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %116 -> %41 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %117 -> %39 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: %119 = neura.load_indexed %98[%99, %100, %101 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %120 = neura.load_indexed %102[%99, %101, %103 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %121 = neura.load_indexed %104[%99, %100, %103 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %122 = "neura.fmul"(%119, %120) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %123 = "neura.fadd"(%121, %122) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %123 to %104[%99, %100, %103 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %124 = "neura.add"(%101, %105) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %124 -> %95 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %106 -> %93 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %98 -> %91 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %99 -> %89 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %100 -> %87 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %102 -> %85 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %103 -> %83 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %104 -> %81 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %105 -> %79 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %107 -> %77 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () // CTRL2DATA-NEXT: } diff --git a/test/c2llvm2mlir/nested_loop/test.mlir b/test/c2llvm2mlir/nested_loop/test.mlir index cedca4e3..9cb82262 100644 --- a/test/c2llvm2mlir/nested_loop/test.mlir +++ b/test/c2llvm2mlir/nested_loop/test.mlir @@ -24,7 +24,7 @@ // CHECK-LLVM2NEURA: accelerator = "neura" // CHECK-LLVM2NEURA: %25 = neura.alloca %24 : !neura.data -> !neura.data // CHECK-LLVM2NEURA: %38 = "neura.phi"(%36, %37) : (!neura.data, !neura.data) -> !neura.data -// CHECK-LLVM2NEURA: %175 = neura.sext %174 : !neura.data -> !neura.data -// CHECK-LLVM2NEURA: %194 = "neura.mul"(%192, %193) : (!neura.data, !neura.data) -> !neura.data +// CHECK-LLVM2NEURA: %182 = neura.sext %181 : !neura.data -> !neura.data +// CHECK-LLVM2NEURA: %201 = "neura.mul"(%199, %200) : (!neura.data, !neura.data) -> !neura.data // CHECK-LLVM2NEURA-MAP: func.func @_Z6kernelPiS_S_(%arg0: !llvm.ptr {llvm.noundef}, %arg1: !llvm.ptr {llvm.noundef}, %arg2: !llvm.ptr {llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", frame_pointer = #llvm.framePointerKind, linkage = #llvm.linkage, mapping_info = {compiled_ii = 17 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 6 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, no_inline, no_unwind, optimize_none, passthrough = ["mustprogress", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 0 : i64, visibility_ = 0 : i64} { \ No newline at end of file diff --git a/test/controflow_fuse/complex_nested/complex_nested.mlir b/test/controflow_fuse/complex_nested/complex_nested.mlir index 7d983ea6..bd7a099a 100644 --- a/test/controflow_fuse/complex_nested/complex_nested.mlir +++ b/test/controflow_fuse/complex_nested/complex_nested.mlir @@ -176,6 +176,7 @@ module attributes {} { // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } + // CTRL2DATA: func.func @_Z14complex_nestedPA32_A32_iPS_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> @@ -271,402 +272,402 @@ module attributes {} { // CTRL2DATA-NEXT: %91 = neura.grant_predicate %52, %83 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %92 = neura.grant_predicate %58, %83 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %93 = neura.grant_predicate %50, %83 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %72 to %73[%74, %75 : !neura.data, !neura.data] !neura.data, i1> : !neura.data // CTRL2DATA-NEXT: %94 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %95 = "neura.phi"(%94, %82) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %95 = "neura.phi"(%94, %85) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %96 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %97 = "neura.phi"(%96, %81) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %98 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %99 = "neura.phi"(%98, %80) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %100 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %101 = "neura.phi"(%100, %76) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %102 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %103 = "neura.phi"(%102, %72) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %104 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %105 = "neura.phi"(%104, %79) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %106 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %107 = "neura.phi"(%106, %73) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %97 = "neura.phi"(%96, %93) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %98 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %99 = "neura.phi"(%98, %92) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %100 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %101 = "neura.phi"(%100, %91) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %102 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %103 = "neura.phi"(%102, %84) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %104 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %105 = "neura.phi"(%104, %90) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %106 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %107 = "neura.phi"(%106, %89) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %108 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %109 = "neura.phi"(%108, %75) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %110 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %111 = "neura.phi"(%110, %74) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %112 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %113 = "neura.phi"(%112, %78) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %114 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %115 = "neura.phi"(%114, %77) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %109 = "neura.phi"(%108, %88) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %110 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %111 = "neura.phi"(%110, %87) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %112 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %113 = "neura.phi"(%112, %86) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %114 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %115 = "neura.phi"(%114, %85) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %116 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %117 = "neura.phi"(%116, %76) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %118 = "neura.icmp"(%117, %115) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %119 = neura.grant_predicate %113, %118 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %120 = neura.grant_predicate %111, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %121 = neura.grant_predicate %109, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %122 = neura.grant_predicate %117, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %123 = neura.grant_predicate %107, %118 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %124 = neura.grant_predicate %105, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %125 = neura.grant_predicate %115, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %126 = neura.grant_predicate %103, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %127 = neura.grant_predicate %101, %118 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %128 = neura.grant_predicate %99, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %117 = "neura.phi"(%116, %84) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %118 = "neura.icmp"(%117, %113) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %119 = neura.grant_predicate %111, %118 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %120 = neura.grant_predicate %109, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %121 = neura.grant_predicate %117, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %122 = neura.grant_predicate %115, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %123 = neura.grant_predicate %107, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %124 = neura.grant_predicate %113, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %125 = neura.grant_predicate %105, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %126 = neura.grant_predicate %103, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %127 = neura.grant_predicate %101, %118 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %128 = neura.grant_predicate %99, %118 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %129 = neura.grant_predicate %97, %118 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %130 = neura.grant_predicate %95, %118 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %131 = "neura.not"(%118) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %132 = neura.grant_predicate %109, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %133 = neura.grant_predicate %105, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %134 = neura.grant_predicate %115, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %135 = neura.grant_predicate %103, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %136 = neura.grant_predicate %107, %131 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %137 = neura.grant_predicate %111, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %138 = neura.grant_predicate %101, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %139 = neura.grant_predicate %113, %131 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %140 = neura.grant_predicate %99, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %132 = neura.grant_predicate %115, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %133 = neura.grant_predicate %105, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %134 = neura.grant_predicate %103, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %135 = neura.grant_predicate %113, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %136 = neura.grant_predicate %101, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %137 = neura.grant_predicate %99, %131 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %138 = neura.grant_predicate %109, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %139 = neura.grant_predicate %107, %131 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %140 = neura.grant_predicate %111, %131 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %141 = neura.grant_predicate %97, %131 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %142 = neura.grant_predicate %95, %131 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %143 = neura.load_indexed %119[%120, %121, %122 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %144 = neura.load_indexed %123[%120, %121 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %145 = "neura.add"(%144, %143) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %145 to %123[%120, %121 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %146 = "neura.add"(%122, %124) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %146 -> %116 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %125 -> %114 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %119 -> %112 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %120 -> %110 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %121 -> %108 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %123 -> %106 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %124 -> %104 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %126 -> %102 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %127 -> %100 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %128 -> %98 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %129 -> %96 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %130 -> %94 : !neura.data !neura.data -// CTRL2DATA-NEXT: %147 = "neura.add"(%132, %133) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %147 -> %69 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %134 -> %67 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %135 -> %65 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %136 -> %63 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %137 -> %61 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %138 -> %59 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %139 -> %57 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %133 -> %55 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %140 -> %53 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %141 -> %51 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %142 -> %49 : !neura.data !neura.data +// CTRL2DATA-NEXT: %143 = "neura.div"(%132, %133) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %144 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %145 = "neura.phi"(%144, %133) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %146 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %147 = "neura.phi"(%146, %142) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %148 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %149 = "neura.phi"(%148, %85) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %149 = "neura.phi"(%148, %143) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %150 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %151 = "neura.phi"(%150, %93) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %152 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %153 = "neura.phi"(%152, %92) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %154 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %155 = "neura.phi"(%154, %91) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %151 = "neura.phi"(%150, %141) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %152 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %153 = "neura.phi"(%152, %140) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %154 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %155 = "neura.phi"(%154, %139) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %156 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %157 = "neura.phi"(%156, %84) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %158 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %159 = "neura.phi"(%158, %90) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %160 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %161 = "neura.phi"(%160, %89) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %157 = "neura.phi"(%156, %138) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %158 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %159 = "neura.phi"(%158, %137) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %160 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %161 = "neura.phi"(%160, %136) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %162 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %163 = "neura.phi"(%162, %88) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %164 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %165 = "neura.phi"(%164, %87) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %163 = "neura.phi"(%162, %134) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %164 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %165 = "neura.phi"(%164, %135) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %166 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %167 = "neura.phi"(%166, %86) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %168 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %169 = "neura.phi"(%168, %85) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %170 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %171 = "neura.phi"(%170, %84) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %172 = "neura.icmp"(%171, %167) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %173 = neura.grant_predicate %165, %172 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %174 = neura.grant_predicate %163, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %175 = neura.grant_predicate %171, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %176 = neura.grant_predicate %169, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %177 = neura.grant_predicate %161, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %178 = neura.grant_predicate %167, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %179 = neura.grant_predicate %159, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %180 = neura.grant_predicate %157, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %181 = neura.grant_predicate %155, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %182 = neura.grant_predicate %153, %172 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %183 = neura.grant_predicate %151, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %184 = neura.grant_predicate %149, %172 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %185 = "neura.not"(%172) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %186 = neura.grant_predicate %169, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %187 = neura.grant_predicate %159, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %188 = neura.grant_predicate %157, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %189 = neura.grant_predicate %167, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %190 = neura.grant_predicate %155, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %191 = neura.grant_predicate %153, %185 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %192 = neura.grant_predicate %163, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %193 = neura.grant_predicate %161, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %194 = neura.grant_predicate %165, %185 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %195 = neura.grant_predicate %151, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %196 = neura.grant_predicate %149, %185 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %197 = neura.load_indexed %173[%174, %175 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %198 = "neura.add"(%176, %197) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %199 = "neura.add"(%175, %177) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %199 -> %170 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %198 -> %168 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %178 -> %166 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %173 -> %164 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %174 -> %162 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %177 -> %160 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %179 -> %158 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %180 -> %156 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %181 -> %154 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %182 -> %152 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %183 -> %150 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %184 -> %148 : !neura.data !neura.data -// CTRL2DATA-NEXT: %200 = "neura.div"(%186, %187) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %167 = "neura.phi"(%166, %134) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %168 = "neura.icmp"(%167, %165) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %169 = neura.grant_predicate %163, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %170 = neura.grant_predicate %161, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %171 = neura.grant_predicate %165, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %172 = neura.grant_predicate %159, %168 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %173 = neura.grant_predicate %167, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %174 = neura.grant_predicate %157, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %175 = neura.grant_predicate %155, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %176 = neura.grant_predicate %153, %168 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %177 = neura.grant_predicate %151, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %178 = neura.grant_predicate %149, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %179 = neura.grant_predicate %147, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %180 = neura.grant_predicate %145, %168 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %181 = "neura.not"(%168) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %182 = neura.grant_predicate %163, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %183 = neura.grant_predicate %165, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %184 = neura.grant_predicate %153, %181 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %185 = neura.grant_predicate %157, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %186 = neura.grant_predicate %149, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %187 = neura.grant_predicate %155, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %188 = neura.grant_predicate %147, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %189 = neura.grant_predicate %159, %181 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %190 = neura.grant_predicate %145, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %191 = neura.grant_predicate %161, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %192 = neura.grant_predicate %151, %181 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %193 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %194 = "neura.phi"(%193, %192) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %195 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %196 = "neura.phi"(%195, %191) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %197 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %198 = "neura.phi"(%197, %190) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %199 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %200 = "neura.phi"(%199, %189) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %201 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %202 = "neura.phi"(%201, %187) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %203 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %204 = "neura.phi"(%203, %196) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %205 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %206 = "neura.phi"(%205, %200) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %202 = "neura.phi"(%201, %188) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %203 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %204 = "neura.phi"(%203, %182) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %205 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %206 = "neura.phi"(%205, %187) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %207 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %208 = "neura.phi"(%207, %195) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %209 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %210 = "neura.phi"(%209, %194) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %211 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %212 = "neura.phi"(%211, %193) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %208 = "neura.phi"(%207, %186) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %209 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %210 = "neura.phi"(%209, %185) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %211 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %212 = "neura.phi"(%211, %184) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> // CTRL2DATA-NEXT: %213 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %214 = "neura.phi"(%213, %192) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %215 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %216 = "neura.phi"(%215, %191) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %217 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %218 = "neura.phi"(%217, %190) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %219 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %220 = "neura.phi"(%219, %188) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %221 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %222 = "neura.phi"(%221, %189) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %223 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %224 = "neura.phi"(%223, %188) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %225 = "neura.icmp"(%224, %222) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %226 = neura.grant_predicate %220, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %227 = neura.grant_predicate %218, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %228 = neura.grant_predicate %222, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %229 = neura.grant_predicate %216, %225 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %230 = neura.grant_predicate %224, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %231 = neura.grant_predicate %214, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %232 = neura.grant_predicate %212, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %233 = neura.grant_predicate %210, %225 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %234 = neura.grant_predicate %208, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %235 = neura.grant_predicate %206, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %236 = neura.grant_predicate %204, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %237 = neura.grant_predicate %202, %225 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %238 = "neura.not"(%225) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %239 = neura.grant_predicate %220, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %240 = neura.grant_predicate %222, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %241 = neura.grant_predicate %210, %238 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %242 = neura.grant_predicate %214, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %243 = neura.grant_predicate %206, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %244 = neura.grant_predicate %212, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %245 = neura.grant_predicate %204, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %246 = neura.grant_predicate %216, %238 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %247 = neura.grant_predicate %202, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %248 = neura.grant_predicate %218, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %249 = neura.grant_predicate %208, %238 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %250 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %251 = "neura.phi"(%250, %237) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %252 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %253 = "neura.phi"(%252, %236) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %254 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %255 = "neura.phi"(%254, %235) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %256 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %257 = "neura.phi"(%256, %227) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %258 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %259 = "neura.phi"(%258, %226) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %260 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %261 = "neura.phi"(%260, %234) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %262 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %263 = "neura.phi"(%262, %233) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %264 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %265 = "neura.phi"(%264, %232) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %266 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %267 = "neura.phi"(%266, %231) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %268 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %269 = "neura.phi"(%268, %230) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %270 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %271 = "neura.phi"(%270, %229) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %272 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %273 = "neura.phi"(%272, %228) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %274 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %275 = "neura.phi"(%274, %227) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %276 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %277 = "neura.phi"(%276, %226) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %278 = "neura.icmp"(%277, %273) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %279 = neura.grant_predicate %271, %278 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %280 = neura.grant_predicate %277, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %281 = neura.grant_predicate %269, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %282 = neura.grant_predicate %267, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %283 = neura.grant_predicate %275, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %284 = neura.grant_predicate %265, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %285 = neura.grant_predicate %273, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %286 = neura.grant_predicate %263, %278 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %287 = neura.grant_predicate %261, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %288 = neura.grant_predicate %259, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %289 = neura.grant_predicate %257, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %290 = neura.grant_predicate %255, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %291 = neura.grant_predicate %253, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %292 = neura.grant_predicate %251, %278 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %293 = "neura.not"(%278) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %294 = neura.grant_predicate %263, %293 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %295 = neura.grant_predicate %267, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %296 = neura.grant_predicate %269, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %297 = neura.grant_predicate %275, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %298 = neura.grant_predicate %261, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %299 = neura.grant_predicate %265, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %300 = neura.grant_predicate %273, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %301 = neura.grant_predicate %259, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %302 = neura.grant_predicate %257, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %303 = neura.grant_predicate %271, %293 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %304 = neura.grant_predicate %255, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %305 = neura.grant_predicate %253, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %306 = neura.grant_predicate %251, %293 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %307 = neura.load_indexed %279[%280, %281, %282 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %308 = "neura.icmp"(%307, %283) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %309 = "neura.sel"(%307, %283, %308) : (!neura.data, !neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %310 = "neura.add"(%280, %284) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %310 -> %276 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %309 -> %274 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %285 -> %272 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %279 -> %270 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %281 -> %268 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %282 -> %266 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %284 -> %264 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %286 -> %262 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %287 -> %260 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %288 -> %258 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %289 -> %256 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %290 -> %254 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %291 -> %252 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %292 -> %250 : !neura.data !neura.data -// CTRL2DATA-NEXT: %311 = neura.load_indexed %294[%295, %296 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %312 = "neura.mul"(%311, %297) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %313 = "neura.div"(%312, %298) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %313 to %294[%295, %296 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %314 = "neura.add"(%296, %299) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %314 -> %223 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %300 -> %221 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %301 -> %219 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %302 -> %217 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %303 -> %215 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %295 -> %213 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %299 -> %211 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %294 -> %209 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %298 -> %207 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %304 -> %205 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %305 -> %203 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %306 -> %201 : !neura.data !neura.data -// CTRL2DATA-NEXT: %315 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %316 = "neura.phi"(%315, %249) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %317 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %318 = "neura.phi"(%317, %248) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %319 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %320 = "neura.phi"(%319, %247) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %321 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %322 = "neura.phi"(%321, %246) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %323 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %324 = "neura.phi"(%323, %245) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %325 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %326 = "neura.phi"(%325, %239) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %327 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %328 = "neura.phi"(%327, %244) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %329 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %330 = "neura.phi"(%329, %243) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %331 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %332 = "neura.phi"(%331, %242) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %333 = neura.reserve : !neura.data, i1> -// CTRL2DATA-NEXT: %334 = "neura.phi"(%333, %241) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %335 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %336 = "neura.phi"(%335, %240) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %337 = neura.reserve : !neura.data -// CTRL2DATA-NEXT: %338 = "neura.phi"(%337, %239) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %339 = "neura.icmp"(%338, %336) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %340 = neura.grant_predicate %334, %339 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %341 = neura.grant_predicate %332, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %342 = neura.grant_predicate %338, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %343 = neura.grant_predicate %330, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %344 = neura.grant_predicate %328, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %345 = neura.grant_predicate %336, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %346 = neura.grant_predicate %326, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %347 = neura.grant_predicate %324, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %348 = neura.grant_predicate %322, %339 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %349 = neura.grant_predicate %320, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %350 = neura.grant_predicate %318, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %351 = neura.grant_predicate %316, %339 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %352 = "neura.not"(%339) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %353 = neura.grant_predicate %332, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %354 = neura.grant_predicate %328, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %355 = neura.grant_predicate %336, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %356 = neura.grant_predicate %326, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %357 = neura.grant_predicate %324, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %358 = neura.grant_predicate %334, %352 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %359 = neura.grant_predicate %322, %352 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %360 = neura.grant_predicate %320, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %361 = neura.grant_predicate %318, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %362 = neura.grant_predicate %316, %352 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %363 = neura.load_indexed %340[%341, %342 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %364 = "neura.icmp"(%363, %343) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %365 = neura.grant_predicate %343, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %366 = neura.grant_predicate %340, %364 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %367 = neura.grant_predicate %341, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %368 = neura.grant_predicate %342, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %369 = neura.grant_predicate %344, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %370 = neura.grant_predicate %345, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %371 = neura.grant_predicate %346, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %372 = neura.grant_predicate %347, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %373 = neura.grant_predicate %348, %364 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %374 = neura.grant_predicate %349, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %375 = neura.grant_predicate %350, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %376 = neura.grant_predicate %351, %364 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %377 = "neura.not"(%364) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %378 = neura.grant_predicate %342, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %379 = neura.grant_predicate %344, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %380 = neura.grant_predicate %345, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %381 = neura.grant_predicate %340, %377 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %382 = neura.grant_predicate %341, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %383 = neura.grant_predicate %343, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %384 = neura.grant_predicate %346, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %385 = neura.grant_predicate %347, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %386 = neura.grant_predicate %348, %377 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %387 = neura.grant_predicate %349, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %388 = neura.grant_predicate %350, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %389 = neura.grant_predicate %351, %377 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %365 to %366[%367, %368 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %390 = "neura.phi"(%389, %376) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %391 = "neura.phi"(%388, %375) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %392 = "neura.phi"(%387, %374) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %393 = "neura.phi"(%386, %373) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %394 = "neura.phi"(%385, %372) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %395 = "neura.phi"(%384, %371) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %396 = "neura.phi"(%383, %365) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %397 = "neura.phi"(%382, %367) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %398 = "neura.phi"(%381, %366) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %399 = "neura.phi"(%380, %370) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %400 = "neura.phi"(%379, %369) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %401 = "neura.phi"(%378, %368) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %402 = "neura.add"(%401, %400) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %402 -> %337 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %399 -> %335 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %398 -> %333 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %397 -> %331 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %396 -> %329 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %400 -> %327 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %395 -> %325 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %394 -> %323 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %393 -> %321 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %392 -> %319 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %391 -> %317 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %390 -> %315 : !neura.data !neura.data -// CTRL2DATA-NEXT: %403 = "neura.add"(%353, %354) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %403 -> %36 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %355 -> %34 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %356 -> %32 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %357 -> %30 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %358 -> %28 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %359 -> %26 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %354 -> %24 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %360 -> %22 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %361 -> %20 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %362 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: %214 = "neura.phi"(%213, %183) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %215 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %216 = "neura.phi"(%215, %182) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %217 = "neura.icmp"(%216, %214) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %218 = neura.grant_predicate %212, %217 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %219 = neura.grant_predicate %210, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %220 = neura.grant_predicate %216, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %221 = neura.grant_predicate %208, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %222 = neura.grant_predicate %206, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %223 = neura.grant_predicate %214, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %224 = neura.grant_predicate %204, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %225 = neura.grant_predicate %202, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %226 = neura.grant_predicate %200, %217 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %227 = neura.grant_predicate %198, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %228 = neura.grant_predicate %196, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %229 = neura.grant_predicate %194, %217 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %230 = "neura.not"(%217) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %231 = neura.grant_predicate %210, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %232 = neura.grant_predicate %206, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %233 = neura.grant_predicate %214, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %234 = neura.grant_predicate %204, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %235 = neura.grant_predicate %202, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %236 = neura.grant_predicate %212, %230 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %237 = neura.grant_predicate %200, %230 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %238 = neura.grant_predicate %198, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %239 = neura.grant_predicate %196, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %240 = neura.grant_predicate %194, %230 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %241 = "neura.add"(%231, %232) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %241 -> %36 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %233 -> %34 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %234 -> %32 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %235 -> %30 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %236 -> %28 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %237 -> %26 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %232 -> %24 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %238 -> %22 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %239 -> %20 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %240 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: %242 = neura.load_indexed %218[%219, %220 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %243 = "neura.icmp"(%242, %221) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %244 = neura.grant_predicate %221, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %245 = neura.grant_predicate %218, %243 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %246 = neura.grant_predicate %219, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %247 = neura.grant_predicate %220, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %248 = neura.grant_predicate %222, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %249 = neura.grant_predicate %223, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %250 = neura.grant_predicate %224, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %251 = neura.grant_predicate %225, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %252 = neura.grant_predicate %226, %243 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %253 = neura.grant_predicate %227, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %254 = neura.grant_predicate %228, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %255 = neura.grant_predicate %229, %243 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %256 = "neura.not"(%243) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %257 = neura.grant_predicate %220, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %258 = neura.grant_predicate %222, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %259 = neura.grant_predicate %223, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %260 = neura.grant_predicate %218, %256 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %261 = neura.grant_predicate %219, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %262 = neura.grant_predicate %221, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %263 = neura.grant_predicate %224, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %264 = neura.grant_predicate %225, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %265 = neura.grant_predicate %226, %256 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %266 = neura.grant_predicate %227, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %267 = neura.grant_predicate %228, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %268 = neura.grant_predicate %229, %256 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %244 to %245[%246, %247 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %269 = "neura.phi"(%268, %255) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %270 = "neura.phi"(%267, %254) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %271 = "neura.phi"(%266, %253) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %272 = "neura.phi"(%265, %252) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %273 = "neura.phi"(%264, %251) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %274 = "neura.phi"(%263, %250) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %275 = "neura.phi"(%262, %244) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %276 = "neura.phi"(%261, %246) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %277 = "neura.phi"(%260, %245) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %278 = "neura.phi"(%259, %249) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %279 = "neura.phi"(%258, %248) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %280 = "neura.phi"(%257, %247) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %281 = "neura.add"(%280, %279) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %281 -> %215 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %278 -> %213 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %277 -> %211 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %276 -> %209 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %275 -> %207 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %279 -> %205 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %274 -> %203 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %273 -> %201 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %272 -> %199 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %271 -> %197 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %270 -> %195 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %269 -> %193 : !neura.data !neura.data +// CTRL2DATA-NEXT: %282 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %283 = "neura.phi"(%282, %180) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %284 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %285 = "neura.phi"(%284, %179) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %286 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %287 = "neura.phi"(%286, %178) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %288 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %289 = "neura.phi"(%288, %170) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %290 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %291 = "neura.phi"(%290, %169) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %292 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %293 = "neura.phi"(%292, %177) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %294 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %295 = "neura.phi"(%294, %176) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %296 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %297 = "neura.phi"(%296, %175) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %298 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %299 = "neura.phi"(%298, %174) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %300 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %301 = "neura.phi"(%300, %173) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %302 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %303 = "neura.phi"(%302, %172) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %304 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %305 = "neura.phi"(%304, %171) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %306 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %307 = "neura.phi"(%306, %170) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %308 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %309 = "neura.phi"(%308, %169) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %310 = "neura.icmp"(%309, %305) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %311 = neura.grant_predicate %303, %310 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %312 = neura.grant_predicate %309, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %313 = neura.grant_predicate %301, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %314 = neura.grant_predicate %299, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %315 = neura.grant_predicate %307, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %316 = neura.grant_predicate %297, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %317 = neura.grant_predicate %305, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %318 = neura.grant_predicate %295, %310 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %319 = neura.grant_predicate %293, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %320 = neura.grant_predicate %291, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %321 = neura.grant_predicate %289, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %322 = neura.grant_predicate %287, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %323 = neura.grant_predicate %285, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %324 = neura.grant_predicate %283, %310 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %325 = "neura.not"(%310) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %326 = neura.grant_predicate %295, %325 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %327 = neura.grant_predicate %299, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %328 = neura.grant_predicate %301, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %329 = neura.grant_predicate %307, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %330 = neura.grant_predicate %293, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %331 = neura.grant_predicate %297, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %332 = neura.grant_predicate %305, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %333 = neura.grant_predicate %291, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %334 = neura.grant_predicate %289, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %335 = neura.grant_predicate %303, %325 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %336 = neura.grant_predicate %287, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %337 = neura.grant_predicate %285, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %338 = neura.grant_predicate %283, %325 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %339 = neura.load_indexed %326[%327, %328 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %340 = "neura.mul"(%339, %329) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %341 = "neura.div"(%340, %330) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %341 to %326[%327, %328 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %342 = "neura.add"(%328, %331) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %342 -> %166 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %332 -> %164 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %333 -> %162 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %334 -> %160 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %335 -> %158 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %327 -> %156 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %331 -> %154 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %326 -> %152 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %330 -> %150 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %336 -> %148 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %337 -> %146 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %338 -> %144 : !neura.data !neura.data +// CTRL2DATA-NEXT: %343 = neura.load_indexed %311[%312, %313, %314 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %344 = "neura.icmp"(%343, %315) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %345 = "neura.sel"(%343, %315, %344) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %346 = "neura.add"(%312, %316) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %346 -> %308 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %345 -> %306 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %317 -> %304 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %311 -> %302 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %313 -> %300 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %314 -> %298 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %316 -> %296 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %318 -> %294 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %319 -> %292 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %320 -> %290 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %321 -> %288 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %322 -> %286 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %323 -> %284 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %324 -> %282 : !neura.data !neura.data +// CTRL2DATA-NEXT: %347 = neura.load_indexed %119[%120, %121 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %348 = "neura.add"(%122, %347) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %349 = "neura.add"(%121, %123) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %349 -> %116 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %348 -> %114 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %124 -> %112 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %119 -> %110 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %120 -> %108 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %123 -> %106 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %125 -> %104 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %126 -> %102 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %127 -> %100 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %128 -> %98 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %129 -> %96 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %130 -> %94 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %72 to %73[%74, %75 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %350 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %351 = "neura.phi"(%350, %82) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %352 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %353 = "neura.phi"(%352, %81) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %354 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %355 = "neura.phi"(%354, %80) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %356 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %357 = "neura.phi"(%356, %76) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %358 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %359 = "neura.phi"(%358, %72) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %360 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %361 = "neura.phi"(%360, %79) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %362 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %363 = "neura.phi"(%362, %73) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %364 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %365 = "neura.phi"(%364, %75) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %366 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %367 = "neura.phi"(%366, %74) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %368 = neura.reserve : !neura.data, i1> +// CTRL2DATA-NEXT: %369 = "neura.phi"(%368, %78) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %370 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %371 = "neura.phi"(%370, %77) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %372 = neura.reserve : !neura.data +// CTRL2DATA-NEXT: %373 = "neura.phi"(%372, %76) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %374 = "neura.icmp"(%373, %371) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %375 = neura.grant_predicate %369, %374 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %376 = neura.grant_predicate %367, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %377 = neura.grant_predicate %365, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %378 = neura.grant_predicate %373, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %379 = neura.grant_predicate %363, %374 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %380 = neura.grant_predicate %361, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %381 = neura.grant_predicate %371, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %382 = neura.grant_predicate %359, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %383 = neura.grant_predicate %357, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %384 = neura.grant_predicate %355, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %385 = neura.grant_predicate %353, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %386 = neura.grant_predicate %351, %374 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %387 = "neura.not"(%374) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %388 = neura.grant_predicate %365, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %389 = neura.grant_predicate %361, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %390 = neura.grant_predicate %371, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %391 = neura.grant_predicate %359, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %392 = neura.grant_predicate %363, %387 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %393 = neura.grant_predicate %367, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %394 = neura.grant_predicate %357, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %395 = neura.grant_predicate %369, %387 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %396 = neura.grant_predicate %355, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %397 = neura.grant_predicate %353, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %398 = neura.grant_predicate %351, %387 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %399 = "neura.add"(%388, %389) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %399 -> %69 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %390 -> %67 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %391 -> %65 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %392 -> %63 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %393 -> %61 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %394 -> %59 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %395 -> %57 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %389 -> %55 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %396 -> %53 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %397 -> %51 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %398 -> %49 : !neura.data !neura.data +// CTRL2DATA-NEXT: %400 = neura.load_indexed %375[%376, %377, %378 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %401 = neura.load_indexed %379[%376, %377 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %402 = "neura.add"(%401, %400) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %402 to %379[%376, %377 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %403 = "neura.add"(%378, %380) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %403 -> %372 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %381 -> %370 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %375 -> %368 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %376 -> %366 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %377 -> %364 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %379 -> %362 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %380 -> %360 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %382 -> %358 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %383 -> %356 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %384 -> %354 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %385 -> %352 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %386 -> %350 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () // CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir index d8ddb8f4..5a22f7e6 100644 --- a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir +++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir @@ -66,7 +66,7 @@ module attributes {} { } } -// CHECK: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CHECK-NEXT: %0 = "neura.constant"() <{value = 4 : index}> : () -> index // CHECK-NEXT: %1 = "neura.constant"() <{value = 3 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 2 : index}> : () -> index @@ -137,7 +137,6 @@ module attributes {} { // CHECK-NEXT: ^bb10: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } - // CTRL2DATA: func.func @_Z29non_perfect_extra_computationPA128_iS0_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> @@ -308,20 +307,109 @@ module attributes {} { // CTRL2DATA-NEXT: %166 = neura.grant_predicate %88, %149 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %167 = neura.grant_predicate %86, %149 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %168 = neura.grant_predicate %116, %149 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %169 = neura.load_indexed %128[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %170 = "neura.mul"(%169, %131) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %170 to %132[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %171 = neura.load_indexed %128[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %172 = "neura.add"(%133, %171) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %173 = "neura.icmp"(%171, %134) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %174 = "neura.sel"(%171, %134, %173) : (!neura.data, !neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %175 = "neura.icmp"(%171, %135) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %176 = "neura.sel"(%171, %135, %175) : (!neura.data, !neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %177 = "neura.add"(%130, %136) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %177 -> %125 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %176 -> %123 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %174 -> %121 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %172 -> %119 : !neura.data !neura.data +// CTRL2DATA-NEXT: %169 = "neura.div"(%150, %151) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %170 = "neura.sub"(%152, %153) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %171 = "neura.icmp"(%170, %154) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %172 = neura.grant_predicate %169, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %173 = neura.grant_predicate %155, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %174 = neura.grant_predicate %170, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %175 = neura.grant_predicate %156, %171 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %176 = neura.grant_predicate %157, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %177 = neura.grant_predicate %158, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %178 = neura.grant_predicate %152, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %179 = neura.grant_predicate %159, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %180 = neura.grant_predicate %153, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %181 = neura.grant_predicate %160, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %182 = neura.grant_predicate %161, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %183 = neura.grant_predicate %162, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %184 = neura.grant_predicate %163, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %185 = neura.grant_predicate %164, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %186 = neura.grant_predicate %154, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %187 = neura.grant_predicate %165, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %188 = neura.grant_predicate %166, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %189 = neura.grant_predicate %167, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %190 = neura.grant_predicate %168, %171 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %191 = neura.grant_predicate %151, %171 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %192 = "neura.not"(%171) : (!neura.data) -> !neura.data +// CTRL2DATA-NEXT: %193 = neura.grant_predicate %169, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %194 = neura.grant_predicate %156, %192 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %195 = neura.grant_predicate %157, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %196 = neura.grant_predicate %158, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %197 = neura.grant_predicate %152, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %198 = neura.grant_predicate %159, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %199 = neura.grant_predicate %153, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %200 = neura.grant_predicate %160, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %201 = neura.grant_predicate %161, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %202 = neura.grant_predicate %170, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %203 = neura.grant_predicate %162, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %204 = neura.grant_predicate %163, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %205 = neura.grant_predicate %164, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %206 = neura.grant_predicate %154, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %207 = neura.grant_predicate %165, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %208 = neura.grant_predicate %166, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %209 = neura.grant_predicate %167, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %210 = neura.grant_predicate %168, %192 : !neura.data, i1>, !neura.data -> !neura.data, i1> +// CTRL2DATA-NEXT: %211 = neura.grant_predicate %151, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %212 = neura.grant_predicate %155, %192 : !neura.data, !neura.data -> !neura.data +// CTRL2DATA-NEXT: %213 = "neura.mul"(%172, %173) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %214 = "neura.div"(%213, %174) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %215 = "neura.phi"(%173, %212) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %216 = "neura.phi"(%191, %211) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %217 = "neura.phi"(%190, %210) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %218 = "neura.phi"(%189, %209) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %219 = "neura.phi"(%188, %208) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %220 = "neura.phi"(%187, %207) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %221 = "neura.phi"(%186, %206) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %222 = "neura.phi"(%185, %205) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %223 = "neura.phi"(%184, %204) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %224 = "neura.phi"(%183, %203) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %225 = "neura.phi"(%174, %202) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %226 = "neura.phi"(%182, %201) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %227 = "neura.phi"(%181, %200) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %228 = "neura.phi"(%180, %199) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %229 = "neura.phi"(%179, %198) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %230 = "neura.phi"(%178, %197) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %231 = "neura.phi"(%177, %196) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %232 = "neura.phi"(%176, %195) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %233 = "neura.phi"(%175, %194) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> +// CTRL2DATA-NEXT: %234 = "neura.phi"(%172, %193) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %235 = "neura.phi"(%214, %193) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %234 to %233[%232, %231 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %230 to %233[%232, %229 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %228 to %233[%232, %227 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %235 to %233[%232, %226 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %225 to %233[%232, %224 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %236 = "neura.add"(%232, %229) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %236 -> %60 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %223 -> %58 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %222 -> %56 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %221 -> %54 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %220 -> %52 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %231 -> %50 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %219 -> %48 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %218 -> %46 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %217 -> %44 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %233 -> %42 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %229 -> %40 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %216 -> %38 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %215 -> %36 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %227 -> %34 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %226 -> %32 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %224 -> %30 : !neura.data !neura.data +// CTRL2DATA-NEXT: %237 = neura.load_indexed %128[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %238 = "neura.mul"(%237, %131) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %238 to %132[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %239 = neura.load_indexed %128[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %240 = "neura.add"(%133, %239) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %241 = "neura.icmp"(%239, %134) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %242 = "neura.sel"(%239, %134, %241) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %243 = "neura.icmp"(%239, %135) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %244 = "neura.sel"(%239, %135, %243) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %245 = "neura.add"(%130, %136) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %245 -> %125 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %244 -> %123 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %242 -> %121 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %240 -> %119 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %137 -> %117 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %128 -> %115 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %129 -> %113 : !neura.data !neura.data @@ -339,94 +427,5 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %146 -> %89 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %147 -> %87 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %148 -> %85 : !neura.data !neura.data -// CTRL2DATA-NEXT: %178 = "neura.div"(%150, %151) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %179 = "neura.sub"(%152, %153) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %180 = "neura.icmp"(%179, %154) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %181 = neura.grant_predicate %178, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %182 = neura.grant_predicate %155, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %183 = neura.grant_predicate %179, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %184 = neura.grant_predicate %156, %180 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %185 = neura.grant_predicate %157, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %186 = neura.grant_predicate %158, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %187 = neura.grant_predicate %152, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %188 = neura.grant_predicate %159, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %189 = neura.grant_predicate %153, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %190 = neura.grant_predicate %160, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %191 = neura.grant_predicate %161, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %192 = neura.grant_predicate %162, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %193 = neura.grant_predicate %163, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %194 = neura.grant_predicate %164, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %195 = neura.grant_predicate %154, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %196 = neura.grant_predicate %165, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %197 = neura.grant_predicate %166, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %198 = neura.grant_predicate %167, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %199 = neura.grant_predicate %168, %180 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %200 = neura.grant_predicate %151, %180 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %201 = "neura.not"(%180) : (!neura.data) -> !neura.data -// CTRL2DATA-NEXT: %202 = neura.grant_predicate %178, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %203 = neura.grant_predicate %156, %201 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %204 = neura.grant_predicate %157, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %205 = neura.grant_predicate %158, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %206 = neura.grant_predicate %152, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %207 = neura.grant_predicate %159, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %208 = neura.grant_predicate %153, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %209 = neura.grant_predicate %160, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %210 = neura.grant_predicate %161, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %211 = neura.grant_predicate %179, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %212 = neura.grant_predicate %162, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %213 = neura.grant_predicate %163, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %214 = neura.grant_predicate %164, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %215 = neura.grant_predicate %154, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %216 = neura.grant_predicate %165, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %217 = neura.grant_predicate %166, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %218 = neura.grant_predicate %167, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %219 = neura.grant_predicate %168, %201 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %220 = neura.grant_predicate %151, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %221 = neura.grant_predicate %155, %201 : !neura.data, !neura.data -> !neura.data -// CTRL2DATA-NEXT: %222 = "neura.mul"(%181, %182) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %223 = "neura.div"(%222, %183) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %224 = "neura.phi"(%182, %221) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %225 = "neura.phi"(%200, %220) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %226 = "neura.phi"(%199, %219) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %227 = "neura.phi"(%198, %218) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %228 = "neura.phi"(%197, %217) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %229 = "neura.phi"(%196, %216) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %230 = "neura.phi"(%195, %215) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %231 = "neura.phi"(%194, %214) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %232 = "neura.phi"(%193, %213) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %233 = "neura.phi"(%192, %212) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %234 = "neura.phi"(%183, %211) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %235 = "neura.phi"(%191, %210) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %236 = "neura.phi"(%190, %209) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %237 = "neura.phi"(%189, %208) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %238 = "neura.phi"(%188, %207) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %239 = "neura.phi"(%187, %206) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %240 = "neura.phi"(%186, %205) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %241 = "neura.phi"(%185, %204) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %242 = "neura.phi"(%184, %203) : (!neura.data, i1>, !neura.data, i1>) -> !neura.data, i1> -// CTRL2DATA-NEXT: %243 = "neura.phi"(%181, %202) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %244 = "neura.phi"(%223, %202) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %243 to %242[%241, %240 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %239 to %242[%241, %238 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %237 to %242[%241, %236 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %244 to %242[%241, %235 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %234 to %242[%241, %233 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %245 = "neura.add"(%241, %238) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %245 -> %60 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %232 -> %58 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %231 -> %56 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %230 -> %54 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %229 -> %52 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %240 -> %50 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %228 -> %48 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %227 -> %46 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %226 -> %44 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %242 -> %42 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %238 -> %40 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %225 -> %38 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %224 -> %36 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %236 -> %34 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %235 -> %32 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %233 -> %30 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () // CTRL2DATA-NEXT: } \ No newline at end of file diff --git a/test/controflow_fuse/perfect_nested/perfect_nested.mlir b/test/controflow_fuse/perfect_nested/perfect_nested.mlir index 0b565887..9f5cdb24 100644 --- a/test/controflow_fuse/perfect_nested/perfect_nested.mlir +++ b/test/controflow_fuse/perfect_nested/perfect_nested.mlir @@ -59,7 +59,8 @@ module attributes {} { } } -// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { + +// CHECK: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index // CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index // CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : index}> : () -> index @@ -89,7 +90,6 @@ module attributes {} { // CHECK-NEXT: ^bb6: // pred: ^bb1 // CHECK-NEXT: "neura.return"() : () -> () // CHECK-NEXT: } - // CAST: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CAST-NEXT: %0 = "neura.constant"() <{value = 1 : i64}> : () -> i64 // CAST-NEXT: %1 = "neura.constant"() <{value = 128 : i64}> : () -> i64 @@ -114,7 +114,6 @@ module attributes {} { // CAST-NEXT: ^bb6: // pred: ^bb1 // CAST-NEXT: "neura.return"() : () -> () // CAST-NEXT: } - // CTRL2DATA: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> @@ -174,25 +173,24 @@ module attributes {} { // CTRL2DATA-NEXT: %55 = neura.grant_predicate %36, %51 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %56 = neura.grant_predicate %38, %51 : !neura.data, i1>, !neura.data -> !neura.data, i1> // CTRL2DATA-NEXT: %57 = neura.grant_predicate %34, %51 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %58 = neura.load_indexed %44[%45, %45, %45, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: neura.store_indexed %58 to %47[%45, %45, %48, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %59 = "neura.add"(%46, %49) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %59 -> %41 : !neura.data !neura.data +// CTRL2DATA-NEXT: %58 = "neura.add"(%52, %53) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %58 -> %20 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %18 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %16 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %14 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %12 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %10 : !neura.data !neura.data +// CTRL2DATA-NEXT: %59 = neura.load_indexed %44[%45, %45, %45, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: neura.store_indexed %59 to %47[%45, %45, %48, %45, %45, %46 : !neura.data, !neura.data, !neura.data, !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %60 = "neura.add"(%46, %49) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %60 -> %41 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %50 -> %39 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %44 -> %37 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %45 -> %35 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %33 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %48 -> %31 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %49 -> %29 : !neura.data !neura.data -// CTRL2DATA-NEXT: %60 = "neura.add"(%52, %53) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %60 -> %20 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %54 -> %18 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %16 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %14 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %12 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %53 -> %10 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"() : () -> () // CTRL2DATA-NEXT: } - // MAPPING: func.func @_Z10bert_node1PA1_A1_A1_A1_A128_bPA1_A128_S1_(%arg0: memref, %arg1: memref) attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage, mapping_info = {compiled_ii = 10 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 8 : i32, res_mii = 2 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}} { \ No newline at end of file diff --git a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir index 16bdbec5..88121ee4 100644 --- a/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir +++ b/test/controflow_fuse/perfect_reduction/perfect_reduction.mlir @@ -46,38 +46,38 @@ module attributes {} { } -// CHECK: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { -// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index -// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index -// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 -// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index -// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %4, %2 : i64, i32 to ^bb1 -// CHECK-NEXT: ^bb1(%5: i64, %6: i32): // 2 preds: ^bb0, ^bb5 -// CHECK-NEXT: %7 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %8 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %8 : i1 then to ^bb2 else to ^bb6 -// CHECK-NEXT: ^bb2: // pred: ^bb1 -// CHECK-NEXT: %9 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %9, %6 : i64, i32 to ^bb3 -// CHECK-NEXT: ^bb3(%10: i64, %11: i32): // 2 preds: ^bb2, ^bb4 -// CHECK-NEXT: %12 = "neura.cast"(%10) <{cast_type = "int_to_index"}> : (i64) -> index -// CHECK-NEXT: %13 = "neura.icmp"(%12, %1) <{cmpType = "slt"}> : (index, index) -> i1 -// CHECK-NEXT: neura.cond_br %13 : i1 then to ^bb4 else to ^bb5 -// CHECK-NEXT: ^bb4: // pred: ^bb3 -// CHECK-NEXT: %14 = neura.load_indexed %arg0[%7, %12 : index, index] memref : i32 -// CHECK-NEXT: %15 = "neura.add"(%11, %14) : (i32, i32) -> i32 -// CHECK-NEXT: %16 = "neura.add"(%12, %0) : (index, index) -> index -// CHECK-NEXT: %17 = "neura.cast"(%16) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %17, %15 : i64, i32 to ^bb3 -// CHECK-NEXT: ^bb5: // pred: ^bb3 -// CHECK-NEXT: %18 = "neura.add"(%7, %0) : (index, index) -> index -// CHECK-NEXT: %19 = "neura.cast"(%18) <{cast_type = "index_to_int"}> : (index) -> i64 -// CHECK-NEXT: neura.br %19, %11 : i64, i32 to ^bb1 -// CHECK-NEXT: ^bb6: // pred: ^bb1 -// CHECK-NEXT: "neura.return"(%6) : (i32) -> () -// CHECK-NEXT: } +// CHECK: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { +// CHECK-NEXT: %0 = "neura.constant"() <{value = 1 : index}> : () -> index +// CHECK-NEXT: %1 = "neura.constant"() <{value = 128 : index}> : () -> index +// CHECK-NEXT: %2 = "neura.constant"() <{value = 0 : i32}> : () -> i32 +// CHECK-NEXT: %3 = "neura.constant"() <{value = 0 : index}> : () -> index +// CHECK-NEXT: %4 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %4, %2 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb1(%5: i64, %6: i32): // 2 preds: ^bb0, ^bb5 +// CHECK-NEXT: %7 = "neura.cast"(%5) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %8 = "neura.icmp"(%7, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %8 : i1 then to ^bb2 else to ^bb6 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %9 = "neura.cast"(%3) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %9, %6 : i64, i32 to ^bb3 +// CHECK-NEXT: ^bb3(%10: i64, %11: i32): // 2 preds: ^bb2, ^bb4 +// CHECK-NEXT: %12 = "neura.cast"(%10) <{cast_type = "int_to_index"}> : (i64) -> index +// CHECK-NEXT: %13 = "neura.icmp"(%12, %1) <{cmpType = "slt"}> : (index, index) -> i1 +// CHECK-NEXT: neura.cond_br %13 : i1 then to ^bb4 else to ^bb5 +// CHECK-NEXT: ^bb4: // pred: ^bb3 +// CHECK-NEXT: %14 = neura.load_indexed %arg0[%7, %12 : index, index] memref : i32 +// CHECK-NEXT: %15 = "neura.add"(%11, %14) : (i32, i32) -> i32 +// CHECK-NEXT: %16 = "neura.add"(%12, %0) : (index, index) -> index +// CHECK-NEXT: %17 = "neura.cast"(%16) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %17, %15 : i64, i32 to ^bb3 +// CHECK-NEXT: ^bb5: // pred: ^bb3 +// CHECK-NEXT: %18 = "neura.add"(%7, %0) : (index, index) -> index +// CHECK-NEXT: %19 = "neura.cast"(%18) <{cast_type = "index_to_int"}> : (index) -> i64 +// CHECK-NEXT: neura.br %19, %11 : i64, i32 to ^bb1 +// CHECK-NEXT: ^bb6: // pred: ^bb1 +// CHECK-NEXT: "neura.return"(%6) : (i32) -> () +// CHECK-NEXT: } // CAST: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", llvm.linkage = #llvm.linkage} { // CAST-NEXT: %0 = "neura.constant"() <{value = 1 : i64}> : () -> i64 // CAST-NEXT: %1 = "neura.constant"() <{value = 128 : i64}> : () -> i64 @@ -103,7 +103,6 @@ module attributes {} { // CAST-NEXT: ^bb6: // pred: ^bb1 // CAST-NEXT: "neura.return"(%5) : (i32) -> () // CAST-NEXT: } - // CTRL2DATA: func.func @_Z27perfect_nested_reduction_2dPA128_i(%arg0: memref) -> i32 attributes {accelerator = "neura", dataflow_mode = "predicate", llvm.linkage = #llvm.linkage} { // CTRL2DATA-NEXT: %0 = "neura.constant"() <{value = "%arg0"}> : () -> !neura.data, i1> // CTRL2DATA-NEXT: %1 = "neura.grant_once"(%0) : (!neura.data, i1>) -> !neura.data, i1> @@ -165,22 +164,22 @@ module attributes {} { // CTRL2DATA-NEXT: %57 = neura.grant_predicate %40, %53 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %58 = neura.grant_predicate %32, %53 : !neura.data, !neura.data -> !neura.data // CTRL2DATA-NEXT: %59 = neura.grant_predicate %38, %53 : !neura.data, i1>, !neura.data -> !neura.data, i1> -// CTRL2DATA-NEXT: %60 = neura.load_indexed %46[%47, %48 : !neura.data, !neura.data] !neura.data, i1> : !neura.data -// CTRL2DATA-NEXT: %61 = "neura.add"(%49, %60) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %62 = "neura.add"(%48, %50) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %62 -> %43 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %61 -> %41 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %51 -> %39 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %46 -> %37 : !neura.data, i1> !neura.data, i1> -// CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %35 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %50 -> %33 : !neura.data !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %52 -> %31 : !neura.data !neura.data -// CTRL2DATA-NEXT: %63 = "neura.add"(%54, %55) : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: neura.ctrl_mov %63 -> %20 : !neura.data !neura.data +// CTRL2DATA-NEXT: %60 = "neura.add"(%54, %55) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %60 -> %20 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %56 -> %18 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %57 -> %16 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %58 -> %14 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %59 -> %12 : !neura.data, i1> !neura.data, i1> // CTRL2DATA-NEXT: neura.ctrl_mov %55 -> %10 : !neura.data !neura.data +// CTRL2DATA-NEXT: %61 = neura.load_indexed %46[%47, %48 : !neura.data, !neura.data] !neura.data, i1> : !neura.data +// CTRL2DATA-NEXT: %62 = "neura.add"(%49, %61) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %63 = "neura.add"(%48, %50) : (!neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %63 -> %43 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %62 -> %41 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %51 -> %39 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %46 -> %37 : !neura.data, i1> !neura.data, i1> +// CTRL2DATA-NEXT: neura.ctrl_mov %47 -> %35 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %50 -> %33 : !neura.data !neura.data +// CTRL2DATA-NEXT: neura.ctrl_mov %52 -> %31 : !neura.data !neura.data // CTRL2DATA-NEXT: "neura.return"(%30) : (!neura.data) -> () // CTRL2DATA-NEXT: } \ No newline at end of file From 76e821837947f4f1f321d0f4fdb46f195be54e23 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 12:29:39 +0800 Subject: [PATCH 06/12] Unify neura.sel argument order to match llvm.select: (cond, ifTrue, ifFalse) --- include/NeuraDialect/NeuraOps.td | 4 ++-- lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp | 6 +++--- lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp | 4 ++-- test/affine2neura/bert/bert_node2/bert_node2.mlir | 4 ++-- .../complex_nested/complex_nested.mlir | 4 ++-- .../non_perfect_nested/non_perfect_nested.mlir | 12 ++++++------ test/neura/interpreter/basic_operation/sel.mlir | 8 ++++---- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/NeuraDialect/NeuraOps.td b/include/NeuraDialect/NeuraOps.td index ff0729ca..f4c52fab 100644 --- a/include/NeuraDialect/NeuraOps.td +++ b/include/NeuraDialect/NeuraOps.td @@ -253,9 +253,9 @@ def Neura_Br : Op { } def Neura_SelOp : Op { - let arguments = (ins AnyType:$ifTrue, AnyType:$ifFalse, AnyType:$cond); + let arguments = (ins AnyType:$cond, AnyType:$ifTrue, AnyType:$ifFalse); let results = (outs AnyType:$result); - // let assemblyFormat = "$ifTrue `,` $ifFalse `,` $cond attr-dict `:` type($ifTrue)"; + // let assemblyFormat = "$cond `,` $ifTrue `,` $ifFalse attr-dict `:` type($result)"; } def Neura_NotOp : Op { diff --git a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp index dc6f4532..795a6ae5 100644 --- a/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp +++ b/lib/Conversion/ArithToNeura/ArithToNeuraPass.cpp @@ -244,9 +244,9 @@ struct ArithSelectToNeuraSel : public OpRewritePattern { Value false_value = op.getFalseValue(); Type result_type = op.getType(); - // Converts arith SelectOp to Neura SelOp. - rewriter.replaceOpWithNewOp(op, result_type, true_value, - false_value, condition); + // Converts arith SelectOp to Neura SelOp with consistent order: (cond, ifTrue, ifFalse). + rewriter.replaceOpWithNewOp(op, result_type, condition, + true_value, false_value); return success(); } }; diff --git a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp index cce6b861..05167bf9 100644 --- a/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp +++ b/lib/Conversion/LlvmToNeura/LlvmToNeuraPass.cpp @@ -264,9 +264,9 @@ struct LlvmSelectToNeuraSel : public OpRewritePattern { Value false_value = op.getFalseValue(); Type result_type = op.getType(); - // Note: neura.sel has different argument order: (ifTrue, ifFalse, cond) + // neura.sel now follows the same order as llvm.select: (cond, ifTrue, ifFalse) rewriter.replaceOpWithNewOp(op, result_type, - true_value, false_value, cond); + cond, true_value, false_value); return success(); } }; diff --git a/test/affine2neura/bert/bert_node2/bert_node2.mlir b/test/affine2neura/bert/bert_node2/bert_node2.mlir index 0bc0a274..16ea0029 100644 --- a/test/affine2neura/bert/bert_node2/bert_node2.mlir +++ b/test/affine2neura/bert/bert_node2/bert_node2.mlir @@ -52,7 +52,7 @@ module attributes {} { // CHECK-NEXT: ^bb4: // pred: ^bb3 // CHECK-NEXT: %16 = neura.load_indexed %arg0[%7, %10 : index, index] memref : i32 // CHECK-NEXT: %17 = "neura.icmp"(%16, %6) <{cmpType = "sge"}> : (i32, i32) -> i1 -// CHECK-NEXT: %18 = "neura.sel"(%4, %16, %17) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %18 = "neura.sel"(%17, %4, %16) : (i1, i32, i32) -> i32 // CHECK-NEXT: neura.cond_br %17 : i1 then to ^bb5 else to ^bb6 // CHECK-NEXT: ^bb5: // pred: ^bb4 // CHECK-NEXT: neura.br %3 : i1 to ^bb7 @@ -62,7 +62,7 @@ module attributes {} { // CHECK-NEXT: ^bb7(%20: i1): // 2 preds: ^bb5, ^bb6 // CHECK-NEXT: neura.br to ^bb8 // CHECK-NEXT: ^bb8: // pred: ^bb7 -// CHECK-NEXT: %21 = "neura.sel"(%5, %18, %20) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %21 = "neura.sel"(%20, %5, %18) : (i1, i32, i32) -> i32 // CHECK-NEXT: %22 = "neura.cast"(%21) <{cast_type = "int_to_index"}> : (i32) -> index // CHECK-NEXT: %23 = neura.load_indexed %arg1[%22, %14 : index, index] memref : f32 // CHECK-NEXT: neura.store_indexed %23 to %arg2[%7, %10, %14 : index, index, index] memref : f32 diff --git a/test/controflow_fuse/complex_nested/complex_nested.mlir b/test/controflow_fuse/complex_nested/complex_nested.mlir index bd7a099a..a3a0668d 100644 --- a/test/controflow_fuse/complex_nested/complex_nested.mlir +++ b/test/controflow_fuse/complex_nested/complex_nested.mlir @@ -138,7 +138,7 @@ module attributes {} { // CHECK-NEXT: ^bb15: // pred: ^bb14 // CHECK-NEXT: %45 = neura.load_indexed %arg0[%43, %38, %9 : index, index, index] memref : i32 // CHECK-NEXT: %46 = "neura.icmp"(%45, %42) <{cmpType = "sgt"}> : (i32, i32) -> i1 -// CHECK-NEXT: %47 = "neura.sel"(%45, %42, %46) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %47 = "neura.sel"(%46, %45, %42) : (i1, i32, i32) -> i32 // CHECK-NEXT: %48 = "neura.add"(%43, %0) : (index, index) -> index // CHECK-NEXT: %49 = "neura.cast"(%48) <{cast_type = "index_to_int"}> : (index) -> i64 // CHECK-NEXT: neura.br %49, %47 : i64, i32 to ^bb14 @@ -559,7 +559,7 @@ module attributes {} { // CTRL2DATA-NEXT: neura.ctrl_mov %338 -> %144 : !neura.data !neura.data // CTRL2DATA-NEXT: %343 = neura.load_indexed %311[%312, %313, %314 : !neura.data, !neura.data, !neura.data] !neura.data, i1> : !neura.data // CTRL2DATA-NEXT: %344 = "neura.icmp"(%343, %315) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %345 = "neura.sel"(%343, %315, %344) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %345 = "neura.sel"(%344, %343, %315) : (!neura.data, !neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %346 = "neura.add"(%312, %316) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %346 -> %308 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %345 -> %306 : !neura.data !neura.data diff --git a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir index 5a22f7e6..44d506b6 100644 --- a/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir +++ b/test/controflow_fuse/non_perfect_nested/non_perfect_nested.mlir @@ -92,7 +92,7 @@ module attributes {} { // CHECK-NEXT: %19 = "neura.mul"(%7, %18) : (i32, i32) -> i32 // CHECK-NEXT: %20 = "neura.sub"(%17, %19) : (i32, i32) -> i32 // CHECK-NEXT: %21 = "neura.icmp"(%20, %11) <{cmpType = "eq"}> : (i32, i32) -> i1 -// CHECK-NEXT: %22 = "neura.sel"(%7, %6, %21) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %22 = "neura.sel"(%21, %7, %6) : (i1, i32, i32) -> i32 // CHECK-NEXT: %23 = "neura.cast"(%12) <{cast_type = "index_to_int"}> : (index) -> i64 // CHECK-NEXT: neura.br %23, %8, %9, %11 : i64, i32, i32, i32 to ^bb3 // CHECK-NEXT: ^bb3(%24: i64, %25: i32, %26: i32, %27: i32): // 2 preds: ^bb2, ^bb4 @@ -106,9 +106,9 @@ module attributes {} { // CHECK-NEXT: %32 = neura.load_indexed %arg0[%15, %28 : index, index] memref : i32 // CHECK-NEXT: %33 = "neura.add"(%27, %32) : (i32, i32) -> i32 // CHECK-NEXT: %34 = "neura.icmp"(%32, %26) <{cmpType = "sgt"}> : (i32, i32) -> i1 -// CHECK-NEXT: %35 = "neura.sel"(%32, %26, %34) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %35 = "neura.sel"(%34, %32, %26) : (i1, i32, i32) -> i32 // CHECK-NEXT: %36 = "neura.icmp"(%32, %25) <{cmpType = "slt"}> : (i32, i32) -> i1 -// CHECK-NEXT: %37 = "neura.sel"(%32, %25, %36) : (i32, i32, i1) -> i32 +// CHECK-NEXT: %37 = "neura.sel"(%36, %32, %25) : (i1, i32, i32) -> i32 // CHECK-NEXT: %38 = "neura.add"(%28, %3) : (index, index) -> index // CHECK-NEXT: %39 = "neura.cast"(%38) <{cast_type = "index_to_int"}> : (index) -> i64 // CHECK-NEXT: neura.br %39, %37, %35, %33 : i64, i32, i32, i32 to ^bb3 @@ -222,7 +222,7 @@ module attributes {} { // CTRL2DATA-NEXT: %81 = "neura.mul"(%64, %80) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %82 = "neura.sub"(%79, %81) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %83 = "neura.icmp"(%82, %65) <{cmpType = "eq"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %84 = "neura.sel"(%64, %66, %83) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %84 = "neura.sel"(%83, %64, %66) : (!neura.data, !neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %85 = neura.reserve : !neura.data // CTRL2DATA-NEXT: %86 = "neura.phi"(%85, %69) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %87 = neura.reserve : !neura.data @@ -402,9 +402,9 @@ module attributes {} { // CTRL2DATA-NEXT: %239 = neura.load_indexed %128[%129, %130 : !neura.data, !neura.data] !neura.data, i1> : !neura.data // CTRL2DATA-NEXT: %240 = "neura.add"(%133, %239) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %241 = "neura.icmp"(%239, %134) <{cmpType = "sgt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %242 = "neura.sel"(%239, %134, %241) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %242 = "neura.sel"(%241, %239, %134) : (!neura.data, !neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %243 = "neura.icmp"(%239, %135) <{cmpType = "slt"}> : (!neura.data, !neura.data) -> !neura.data -// CTRL2DATA-NEXT: %244 = "neura.sel"(%239, %135, %243) : (!neura.data, !neura.data, !neura.data) -> !neura.data +// CTRL2DATA-NEXT: %244 = "neura.sel"(%243, %239, %135) : (!neura.data, !neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: %245 = "neura.add"(%130, %136) : (!neura.data, !neura.data) -> !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %245 -> %125 : !neura.data !neura.data // CTRL2DATA-NEXT: neura.ctrl_mov %244 -> %123 : !neura.data !neura.data diff --git a/test/neura/interpreter/basic_operation/sel.mlir b/test/neura/interpreter/basic_operation/sel.mlir index 8166555e..4c1e2d91 100644 --- a/test/neura/interpreter/basic_operation/sel.mlir +++ b/test/neura/interpreter/basic_operation/sel.mlir @@ -7,7 +7,7 @@ func.func @test_sel_with_comparison() -> f32 { %true_val = arith.constant 100.0 : f32 %false_val = arith.constant 200.0 : f32 - %res = "neura.sel"(%true_val, %false_val, %cond) : (f32, f32, i1) -> f32 + %res = "neura.sel"(%cond, %true_val, %false_val) : (i1, f32, f32) -> f32 // CHECK: [neura-interpreter] → Output: 100.000000 return %res : f32 @@ -20,7 +20,7 @@ func.func @test_sel_with_comparison_false() -> f32 { %true_val = arith.constant 100.0 : f32 %false_val = arith.constant 200.0 : f32 - %res = "neura.sel"(%true_val, %false_val, %cond) : (f32, f32, i1) -> f32 + %res = "neura.sel"(%cond, %true_val, %false_val) : (i1, f32, f32) -> f32 // CHECK: [neura-interpreter] → Output: 200.000000 return %res : f32 @@ -33,14 +33,14 @@ func.func @test_sel_nested_with_comparison() -> f32 { %true_val1 = arith.constant 100.0 : f32 %false_val1 = arith.constant 200.0 : f32 - %sel1 = "neura.sel"(%true_val1, %false_val1, %cond1) : (f32, f32, i1) -> f32 + %sel1 = "neura.sel"(%cond1, %true_val1, %false_val1) : (i1, f32, f32) -> f32 %c = arith.constant 5.0 : f32 %d = arith.constant 1.0 : f32 %cond2 = "neura.fcmp"(%c, %d) {cmpType = "gt"} : (f32, f32) -> i1 %true_val2 = arith.constant 300.0 : f32 - %res = "neura.sel"(%true_val2, %sel1, %cond2) : (f32, f32, i1) -> f32 + %res = "neura.sel"(%cond2, %true_val2, %sel1) : (i1, f32, f32) -> f32 // CHECK: [neura-interpreter] → Output: 300.000000 return %res : f32 From ac8b2d1f1f0258a1ebd3e3b23f6299ec69a05aa2 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 12:46:20 +0800 Subject: [PATCH 07/12] Add periods to all comments in FoldConstantPass.cpp --- .../HwAgnosticOpt/FoldConstantPass.cpp | 178 +++++++++--------- 1 file changed, 91 insertions(+), 87 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 374a0482..f4324be0 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -58,15 +58,19 @@ void addConstantAttribute(Operation *op, StringRef attr_name, // Generic Constant Folding Framework // ========================================= -// Structure to hold information about which operands to fold +// Structure to hold information about which operands to fold. struct OperandFoldingInfo { - SmallVector const_operand_indices; // Indices of constant operands to fold - SmallVector const_values; // Corresponding constant values - SmallVector all_operands; // All operands (nullptr for folded ones) - SmallVector const_ops_to_clean; // Constant ops to potentially clean up + // Indices of constant operands to fold. + SmallVector const_operand_indices; + // Corresponding constant values. + SmallVector const_values; + // All operands (nullptr for folded ones). + SmallVector all_operands; + // Constant ops to potentially clean up. + SmallVector const_ops_to_clean; }; -// Analyzes operands from right to left and determines which to fold +// Analyzes operands from right to left and determines which to fold. OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { OperandFoldingInfo info; @@ -75,7 +79,7 @@ OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { return info; } - // First pass: identify which operands are constants + // First pass: identify which operands are constants. SmallVector is_const(num_operands, false); bool has_non_const = false; @@ -87,8 +91,8 @@ OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { } } - // Second pass: decide which constants to fold - // Build all_operands array (nullptr for folded operands) + // Second pass: decide which constants to fold. + // Build all_operands array (nullptr for folded operands). info.all_operands.resize(num_operands); for (size_t i = 0; i < num_operands; ++i) { @@ -96,18 +100,18 @@ OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { if (is_const[i]) { // If this is operand 0 and there are no other non-const operands, - // we must keep it (MLIR operations need at least one operand) + // we must keep it (MLIR operations need at least one operand). if (i == 0 && !has_non_const) { info.all_operands[i] = operand; } else { - // This operand will be folded - mark as nullptr + // This operand will be folded - mark as nullptr. info.all_operands[i] = nullptr; info.const_operand_indices.push_back(i); info.const_values.push_back(getOriginConstantValue(operand)); info.const_ops_to_clean.push_back(operand.getDefiningOp()); } } else { - // This operand is not a constant, keep it + // This operand is not a constant, keep it. info.all_operands[i] = operand; } } @@ -115,19 +119,19 @@ OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { return info; } -// Gets the attribute name for a given operand index -// For binary operations, uses "lhs_value" and "rhs_value" -// For other operations, uses "operand_N_value" +// Gets the attribute name for a given operand index. +// For binary operations, uses "lhs_value" and "rhs_value". +// For other operations, uses "operand_N_value". std::string getAttributeNameForOperandIndex(size_t index, size_t total_operands) { if (total_operands == 2) { - // Binary operation: use lhs_value/rhs_value + // Binary operation: use lhs_value/rhs_value. if (index == 0) { return "lhs_value"; } else { return "rhs_value"; } } else { - // Multi-operand operation: use operand_N_value + // Multi-operand operation: use operand_N_value. return "operand_" + std::to_string(index) + "_value"; } } @@ -139,28 +143,28 @@ template struct GenericFuseConstantPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - // Virtual function to get attribute name for a given operand index - // Default implementation uses binary naming (lhs/rhs) or operand_N naming - // Derived classes can override this for custom naming + // Virtual function to get attribute name for a given operand index. + // Default implementation uses binary naming (lhs/rhs) or operand_N naming. + // Derived classes can override this for custom naming. virtual std::string getAttributeName(size_t operand_idx, size_t total_operands) const { return getAttributeNameForOperandIndex(operand_idx, total_operands); } LogicalResult matchAndRewrite(OpType op, PatternRewriter &rewriter) const override { - // Get the original number of operands before folding + // Get the original number of operands before folding. size_t num_operands = op->getNumOperands(); - // Analyze operands to determine which can be folded + // Analyze operands to determine which can be folded. OperandFoldingInfo fold_info = analyzeOperandsForFolding(op); - // If no constant operands found, nothing to do + // If no constant operands found, nothing to do. if (fold_info.const_operand_indices.empty()) { return failure(); } - // Check if any operands have already been folded - // Look for any attribute ending with "_value" which indicates constant folding + // Check if any operands have already been folded. + // Look for any attribute ending with "_value" which indicates constant folding. for (auto attr : op->getAttrs()) { StringRef attr_name = attr.getName().getValue(); if (attr_name.ends_with("_value")) { @@ -168,7 +172,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } } - // Create the new operation with all operands (nullptr for folded ones) + // Create the new operation with all operands (nullptr for folded ones). Operation *new_op = createOpWithFoldedConstants( op, fold_info.all_operands, rewriter); @@ -176,7 +180,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { return failure(); } - // Add constant attributes for each folded operand + // Add constant attributes for each folded operand. for (size_t i = 0; i < fold_info.const_operand_indices.size(); ++i) { size_t operand_idx = fold_info.const_operand_indices[i]; Attribute const_value = fold_info.const_values[i]; @@ -185,10 +189,10 @@ struct GenericFuseConstantPattern : public OpRewritePattern { addConstantAttribute(new_op, attr_name, const_value); } - // Replace the old operation + // Replace the old operation. rewriter.replaceOp(op, new_op->getResults()); - // Clean up unused constant operations + // Clean up unused constant operations. for (Operation *const_op : fold_info.const_ops_to_clean) { if (const_op->use_empty()) { rewriter.eraseOp(const_op); @@ -198,8 +202,8 @@ struct GenericFuseConstantPattern : public OpRewritePattern { return success(); } - // Virtual function to create the operation with folded constants - // Must be implemented by derived classes + // Virtual function to create the operation with folded constants. + // Must be implemented by derived classes. virtual Operation * createOpWithFoldedConstants(OpType op, ArrayRef non_const_operands, PatternRewriter &rewriter) const = 0; @@ -209,7 +213,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { // Specialized Patterns for Specific Operations // ========================================= -// Helper macro to define a pattern for a binary operation +// Helper macro to define a pattern for a binary operation. #define DEFINE_BINARY_OP_PATTERN(OP_NAME, OP_TYPE) \ struct Fuse##OP_NAME##ConstantPattern \ : public GenericFuseConstantPattern { \ @@ -217,16 +221,16 @@ struct GenericFuseConstantPattern : public OpRewritePattern { Operation *createOpWithFoldedConstants( \ neura::OP_TYPE op, ArrayRef all_operands, \ PatternRewriter &rewriter) const override { \ - /* Extract only non-null operands */ \ + /* Extract only non-null operands. */ \ SmallVector operands; \ for (Value v : all_operands) { \ if (v) operands.push_back(v); \ } \ - /* Use generic Operation create and copy attributes */ \ + /* Use generic Operation create and copy attributes. */ \ OperationState state(op.getLoc(), op.getOperationName()); \ state.addOperands(operands); \ state.addTypes(op->getResultTypes()); \ - /* Copy attributes except operandSegmentSizes (will be auto-generated) */ \ + /* Copy attributes except operandSegmentSizes (will be auto-generated). */ \ for (auto attr : op->getAttrs()) { \ if (attr.getName() != "operandSegmentSizes") { \ state.addAttribute(attr.getName(), attr.getValue()); \ @@ -236,7 +240,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } \ }; -// Define patterns for all binary arithmetic operations +// Define patterns for all binary arithmetic operations. DEFINE_BINARY_OP_PATTERN(Add, AddOp) DEFINE_BINARY_OP_PATTERN(Sub, SubOp) DEFINE_BINARY_OP_PATTERN(Mul, MulOp) @@ -246,7 +250,7 @@ DEFINE_BINARY_OP_PATTERN(FAdd, FAddOp) DEFINE_BINARY_OP_PATTERN(FSub, FSubOp) DEFINE_BINARY_OP_PATTERN(FMul, FMulOp) -// Special case for ICmp with cmp_type attribute +// Special case for ICmp with cmp_type attribute. struct FuseICmpConstantPattern : public GenericFuseConstantPattern { using GenericFuseConstantPattern::GenericFuseConstantPattern; @@ -254,17 +258,17 @@ struct FuseICmpConstantPattern Operation *createOpWithFoldedConstants( neura::ICmpOp op, ArrayRef all_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands + // Extract only non-null operands. SmallVector operands; for (Value v : all_operands) { if (v) operands.push_back(v); } - // Use generic Operation create and copy attributes + // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated) + // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); @@ -274,7 +278,7 @@ struct FuseICmpConstantPattern } }; -// Special case for FMax with nan_semantic attribute +// Special case for FMax with nan_semantic attribute. struct FuseFMaxConstantPattern : public GenericFuseConstantPattern { using GenericFuseConstantPattern::GenericFuseConstantPattern; @@ -282,17 +286,17 @@ struct FuseFMaxConstantPattern Operation *createOpWithFoldedConstants( neura::FMaxOp op, ArrayRef all_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands + // Extract only non-null operands. SmallVector operands; for (Value v : all_operands) { if (v) operands.push_back(v); } - // Use generic Operation create and copy attributes + // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated) + // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); @@ -302,7 +306,7 @@ struct FuseFMaxConstantPattern } }; -// Special case for FMin with nan_semantic attribute +// Special case for FMin with nan_semantic attribute. struct FuseFMinConstantPattern : public GenericFuseConstantPattern { using GenericFuseConstantPattern::GenericFuseConstantPattern; @@ -310,17 +314,17 @@ struct FuseFMinConstantPattern Operation *createOpWithFoldedConstants( neura::FMinOp op, ArrayRef all_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands + // Extract only non-null operands. SmallVector operands; for (Value v : all_operands) { if (v) operands.push_back(v); } - // Use generic Operation create and copy attributes + // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated) + // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); @@ -330,11 +334,11 @@ struct FuseFMinConstantPattern } }; -// Pattern for GEP operation (base + indices) +// Pattern for GEP operation (base + indices). struct FuseGEPConstantPattern : public GenericFuseConstantPattern { using GenericFuseConstantPattern::GenericFuseConstantPattern; - // GEP always uses lhs_value for base (operand 0) + // GEP always uses lhs_value for base (operand 0). std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { if (operand_idx == 0) { return "lhs_value"; @@ -346,7 +350,7 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { Operation *createOpWithFoldedConstants( neura::GEP op, ArrayRef all_operands, PatternRewriter &rewriter) const override { - // GEP: operand 0 is base, rest are indices + // GEP: operand 0 is base, rest are indices. Value base = all_operands[0]; SmallVector indices; for (size_t i = 1; i < all_operands.size(); ++i) { @@ -355,7 +359,7 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } } - // Build operand list and calculate segment sizes + // Build operand list and calculate segment sizes. SmallVector operands; int32_t num_base = 0; if (base) { @@ -367,19 +371,19 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } int32_t num_indices = indices.size(); - // Create operation with proper operandSegmentSizes + // Create operation with proper operandSegmentSizes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes + // Copy attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Set the correct operandSegmentSizes + // Set the correct operandSegmentSizes. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({num_base, num_indices})); @@ -387,12 +391,12 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } }; -// Pattern for Store operation (value, addr) +// Pattern for Store operation (value, addr). struct FuseStoreConstantPattern : public GenericFuseConstantPattern { using GenericFuseConstantPattern::GenericFuseConstantPattern; - // Store uses lhs_value for value (operand 0) and rhs_value for addr (operand 1) + // Store uses lhs_value for value (operand 0) and rhs_value for addr (operand 1). std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { if (operand_idx == 0) { return "lhs_value"; @@ -406,18 +410,18 @@ struct FuseStoreConstantPattern Operation *createOpWithFoldedConstants( neura::StoreOp op, ArrayRef all_operands, PatternRewriter &rewriter) const override { - // Store has two operands: value (operand 0) and addr (operand 1) - // Build operand list with only non-null values + // Store has two operands: value (operand 0) and addr (operand 1). + // Build operand list with only non-null values. SmallVector operands; for (Value v : all_operands) { if (v) operands.push_back(v); } - // Use generic Operation create and copy attributes + // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated) + // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); @@ -427,20 +431,20 @@ struct FuseStoreConstantPattern } }; -// Pattern for LoadIndexed operation (base + indices) -// Only folds the base, never folds indices (required by assemblyFormat) +// Pattern for LoadIndexed operation (base + indices). +// Only folds the base, never folds indices (required by assemblyFormat). struct FuseLoadIndexedConstantPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(neura::LoadIndexedOp op, PatternRewriter &rewriter) const override { - // Check if already folded + // Check if already folded. if (op->hasAttr("lhs_value")) { return failure(); } - // Only check if base is a constant + // Only check if base is a constant. Value base = op.getBase(); if (!base || !isOriginConstantOp(base)) { return failure(); @@ -449,35 +453,35 @@ struct FuseLoadIndexedConstantPattern auto constant_op = dyn_cast(base.getDefiningOp()); Attribute base_value = getOriginConstantValue(base); - // Keep all indices unchanged (never fold indices) + // Keep all indices unchanged (never fold indices). SmallVector indices; for (Value idx : op.getIndices()) { indices.push_back(idx); } - // Create new LoadIndexed without base + // Create new LoadIndexed without base. OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(indices); // Only indices, no base + state.addOperands(indices); // Only indices, no base. state.addTypes(op->getResultTypes()); - // Copy all attributes except operandSegmentSizes + // Copy all attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Add the folded base value + // Add the folded base value. state.addAttribute("lhs_value", base_value); - // Set operandSegmentSizes: 0 base, N indices + // Set operandSegmentSizes: 0 base, N indices. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({0, static_cast(indices.size())})); Operation *new_op = rewriter.create(state); rewriter.replaceOp(op, new_op->getResults()); - // Clean up constant if no longer used + // Clean up constant if no longer used. if (constant_op->use_empty()) { rewriter.eraseOp(constant_op); } @@ -486,38 +490,38 @@ struct FuseLoadIndexedConstantPattern } }; -// Pattern for StoreIndexed operation (value, base, indices...) -// Only folds value and base, never folds indices (required by assemblyFormat) +// Pattern for StoreIndexed operation (value, base, indices...). +// Only folds value and base, never folds indices (required by assemblyFormat). struct FuseStoreIndexedConstantPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(neura::StoreIndexedOp op, PatternRewriter &rewriter) const override { - // Check if already folded + // Check if already folded. if (op->hasAttr("lhs_value") || op->hasAttr("rhs_value")) { return failure(); } - // Check which of value/base are constants + // Check which of value/base are constants. Value value = op.getValue(); Value base = op.getBase(); bool value_is_const = value && isOriginConstantOp(value); bool base_is_const = base && isOriginConstantOp(base); - // Nothing to fold if neither is constant + // Nothing to fold if neither is constant. if (!value_is_const && !base_is_const) { return failure(); } - // Keep all indices unchanged (never fold indices) + // Keep all indices unchanged (never fold indices). SmallVector indices; for (Value idx : op.getIndices()) { indices.push_back(idx); } - // Build the new operand list + // Build the new operand list. SmallVector operands; int32_t num_value = 0; int32_t num_base = 0; @@ -537,19 +541,19 @@ struct FuseStoreIndexedConstantPattern } int32_t num_indices = indices.size(); - // Create new StoreIndexed + // Create new StoreIndexed. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy all attributes except operandSegmentSizes + // Copy all attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Add folded constant attributes + // Add folded constant attributes. if (value_is_const) { state.addAttribute("lhs_value", getOriginConstantValue(value)); } @@ -557,14 +561,14 @@ struct FuseStoreIndexedConstantPattern state.addAttribute("rhs_value", getOriginConstantValue(base)); } - // Set operandSegmentSizes: num_value, num_base, num_indices + // Set operandSegmentSizes: num_value, num_base, num_indices. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({num_value, num_base, num_indices})); Operation *new_op = rewriter.create(state); rewriter.replaceOp(op, new_op->getResults()); - // Clean up unused constants + // Clean up unused constants. if (value_is_const) { auto const_op = value.getDefiningOp(); if (const_op->use_empty()) { @@ -644,7 +648,7 @@ struct FoldConstantPass ModuleOp module_op = getOperation(); RewritePatternSet patterns(&getContext()); - // Add generic constant folding patterns for all operations + // Add generic constant folding patterns for all operations. patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); @@ -657,13 +661,13 @@ struct FoldConstantPass patterns.add(&getContext()); patterns.add(&getContext()); - // Add patterns for memory operations + // Add patterns for memory operations. patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); - // Add pattern for grant operations (post-transform) + // Add pattern for grant operations (post-transform). patterns.add(&getContext()); FrozenRewritePatternSet frozen(std::move(patterns)); From a91f0413c423a372358dd019a605de2f6b102e4b Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 13:52:38 +0800 Subject: [PATCH 08/12] Refactor OperandFoldingInfo from SoA to AoS pattern for better clarity --- .../HwAgnosticOpt/FoldConstantPass.cpp | 165 ++++++++---------- 1 file changed, 68 insertions(+), 97 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index f4324be0..16d5138a 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -58,25 +58,20 @@ void addConstantAttribute(Operation *op, StringRef attr_name, // Generic Constant Folding Framework // ========================================= -// Structure to hold information about which operands to fold. -struct OperandFoldingInfo { - // Indices of constant operands to fold. - SmallVector const_operand_indices; - // Corresponding constant values. - SmallVector const_values; - // All operands (nullptr for folded ones). - SmallVector all_operands; - // Constant ops to potentially clean up. - SmallVector const_ops_to_clean; +// Information about a single constant operand to fold. +struct ConstantOperandInfo { + size_t index; // Index in the original operand list. + Attribute const_value; // The constant value. + Operation *defining_op; // The operation that defines this constant. }; -// Analyzes operands from right to left and determines which to fold. -OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { - OperandFoldingInfo info; +// Analyzes operands and returns information about constants to fold. +SmallVector analyzeOperandsForFolding(Operation *op) { + SmallVector constants_to_fold; size_t num_operands = op->getNumOperands(); if (num_operands == 0) { - return info; + return constants_to_fold; } // First pass: identify which operands are constants. @@ -91,32 +86,26 @@ OperandFoldingInfo analyzeOperandsForFolding(Operation *op) { } } - // Second pass: decide which constants to fold. - // Build all_operands array (nullptr for folded operands). - info.all_operands.resize(num_operands); - + // Second pass: collect constants to fold. for (size_t i = 0; i < num_operands; ++i) { - Value operand = op->getOperand(i); - if (is_const[i]) { // If this is operand 0 and there are no other non-const operands, // we must keep it (MLIR operations need at least one operand). if (i == 0 && !has_non_const) { - info.all_operands[i] = operand; - } else { - // This operand will be folded - mark as nullptr. - info.all_operands[i] = nullptr; - info.const_operand_indices.push_back(i); - info.const_values.push_back(getOriginConstantValue(operand)); - info.const_ops_to_clean.push_back(operand.getDefiningOp()); + continue; // Don't fold this one. } - } else { - // This operand is not a constant, keep it. - info.all_operands[i] = operand; + + // This operand will be folded. + Value operand = op->getOperand(i); + constants_to_fold.push_back({ + i, + getOriginConstantValue(operand), + operand.getDefiningOp() + }); } } - return info; + return constants_to_fold; } // Gets the attribute name for a given operand index. @@ -156,10 +145,10 @@ struct GenericFuseConstantPattern : public OpRewritePattern { size_t num_operands = op->getNumOperands(); // Analyze operands to determine which can be folded. - OperandFoldingInfo fold_info = analyzeOperandsForFolding(op); + SmallVector constants_to_fold = analyzeOperandsForFolding(op); // If no constant operands found, nothing to do. - if (fold_info.const_operand_indices.empty()) { + if (constants_to_fold.empty()) { return failure(); } @@ -172,30 +161,39 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } } - // Create the new operation with all operands (nullptr for folded ones). + // Build list of non-constant operands. + SmallVector non_const_operands; + SmallVector is_folded(num_operands, false); + for (const auto &const_info : constants_to_fold) { + is_folded[const_info.index] = true; + } + for (size_t i = 0; i < num_operands; ++i) { + if (!is_folded[i]) { + non_const_operands.push_back(op->getOperand(i)); + } + } + + // Create the new operation with only non-constant operands. Operation *new_op = createOpWithFoldedConstants( - op, fold_info.all_operands, rewriter); + op, non_const_operands, rewriter); if (!new_op) { return failure(); } // Add constant attributes for each folded operand. - for (size_t i = 0; i < fold_info.const_operand_indices.size(); ++i) { - size_t operand_idx = fold_info.const_operand_indices[i]; - Attribute const_value = fold_info.const_values[i]; - - std::string attr_name = getAttributeName(operand_idx, num_operands); - addConstantAttribute(new_op, attr_name, const_value); + for (const auto &const_info : constants_to_fold) { + std::string attr_name = getAttributeName(const_info.index, num_operands); + addConstantAttribute(new_op, attr_name, const_info.const_value); } // Replace the old operation. rewriter.replaceOp(op, new_op->getResults()); // Clean up unused constant operations. - for (Operation *const_op : fold_info.const_ops_to_clean) { - if (const_op->use_empty()) { - rewriter.eraseOp(const_op); + for (const auto &const_info : constants_to_fold) { + if (const_info.defining_op->use_empty()) { + rewriter.eraseOp(const_info.defining_op); } } @@ -219,16 +217,11 @@ struct GenericFuseConstantPattern : public OpRewritePattern { : public GenericFuseConstantPattern { \ using GenericFuseConstantPattern::GenericFuseConstantPattern; \ Operation *createOpWithFoldedConstants( \ - neura::OP_TYPE op, ArrayRef all_operands, \ + neura::OP_TYPE op, ArrayRef non_const_operands, \ PatternRewriter &rewriter) const override { \ - /* Extract only non-null operands. */ \ - SmallVector operands; \ - for (Value v : all_operands) { \ - if (v) operands.push_back(v); \ - } \ /* Use generic Operation create and copy attributes. */ \ OperationState state(op.getLoc(), op.getOperationName()); \ - state.addOperands(operands); \ + state.addOperands(non_const_operands); \ state.addTypes(op->getResultTypes()); \ /* Copy attributes except operandSegmentSizes (will be auto-generated). */ \ for (auto attr : op->getAttrs()) { \ @@ -256,17 +249,11 @@ struct FuseICmpConstantPattern using GenericFuseConstantPattern::GenericFuseConstantPattern; Operation *createOpWithFoldedConstants( - neura::ICmpOp op, ArrayRef all_operands, + neura::ICmpOp op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands. - SmallVector operands; - for (Value v : all_operands) { - if (v) operands.push_back(v); - } - // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(operands); + state.addOperands(non_const_operands); state.addTypes(op->getResultTypes()); // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { @@ -284,17 +271,11 @@ struct FuseFMaxConstantPattern using GenericFuseConstantPattern::GenericFuseConstantPattern; Operation *createOpWithFoldedConstants( - neura::FMaxOp op, ArrayRef all_operands, + neura::FMaxOp op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands. - SmallVector operands; - for (Value v : all_operands) { - if (v) operands.push_back(v); - } - // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(operands); + state.addOperands(non_const_operands); state.addTypes(op->getResultTypes()); // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { @@ -312,17 +293,11 @@ struct FuseFMinConstantPattern using GenericFuseConstantPattern::GenericFuseConstantPattern; Operation *createOpWithFoldedConstants( - neura::FMinOp op, ArrayRef all_operands, + neura::FMinOp op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { - // Extract only non-null operands. - SmallVector operands; - for (Value v : all_operands) { - if (v) operands.push_back(v); - } - // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(operands); + state.addOperands(non_const_operands); state.addTypes(op->getResultTypes()); // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { @@ -348,28 +323,31 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } Operation *createOpWithFoldedConstants( - neura::GEP op, ArrayRef all_operands, + neura::GEP op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { // GEP: operand 0 is base, rest are indices. - Value base = all_operands[0]; - SmallVector indices; - for (size_t i = 1; i < all_operands.size(); ++i) { - if (all_operands[i]) { - indices.push_back(all_operands[i]); - } - } + // Determine which operands are kept by checking against original. + Value orig_base = op.getBase(); + auto orig_indices = op.getIndices(); + + bool base_is_const = isOriginConstantOp(orig_base); // Build operand list and calculate segment sizes. SmallVector operands; int32_t num_base = 0; - if (base) { - operands.push_back(base); + int32_t num_indices = 0; + + if (!base_is_const) { + operands.push_back(orig_base); num_base = 1; } - for (Value idx : indices) { - operands.push_back(idx); + + for (Value idx : orig_indices) { + if (!isOriginConstantOp(idx)) { + operands.push_back(idx); + num_indices++; + } } - int32_t num_indices = indices.size(); // Create operation with proper operandSegmentSizes. OperationState state(op.getLoc(), op.getOperationName()); @@ -408,18 +386,11 @@ struct FuseStoreConstantPattern } Operation *createOpWithFoldedConstants( - neura::StoreOp op, ArrayRef all_operands, + neura::StoreOp op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { - // Store has two operands: value (operand 0) and addr (operand 1). - // Build operand list with only non-null values. - SmallVector operands; - for (Value v : all_operands) { - if (v) operands.push_back(v); - } - // Use generic Operation create and copy attributes. OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(operands); + state.addOperands(non_const_operands); state.addTypes(op->getResultTypes()); // Copy attributes except operandSegmentSizes (will be auto-generated). for (auto attr : op->getAttrs()) { From 3d9d0715680e43075106ac5fcdcbc689682a07b3 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 14:12:08 +0800 Subject: [PATCH 09/12] Add comments to explain macro-generated pattern classes in FoldConstantPass --- .../HwAgnosticOpt/FoldConstantPass.cpp | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 16d5138a..29d55df4 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -234,13 +234,48 @@ struct GenericFuseConstantPattern : public OpRewritePattern { }; // Define patterns for all binary arithmetic operations. +// +// Note: The macro DEFINE_BINARY_OP_PATTERN expands to create a complete pattern class. +// For example, DEFINE_BINARY_OP_PATTERN(Add, AddOp) expands to: +// +// struct FuseAddConstantPattern +// : public GenericFuseConstantPattern { +// using GenericFuseConstantPattern::GenericFuseConstantPattern; +// +// Operation *createOpWithFoldedConstants( +// neura::AddOp op, ArrayRef non_const_operands, +// PatternRewriter &rewriter) const override { +// // Use generic Operation create and copy attributes. +// OperationState state(op.getLoc(), op.getOperationName()); +// state.addOperands(non_const_operands); +// state.addTypes(op->getResultTypes()); +// // Copy attributes except operandSegmentSizes (will be auto-generated). +// for (auto attr : op->getAttrs()) { +// if (attr.getName() != "operandSegmentSizes") { +// state.addAttribute(attr.getName(), attr.getValue()); +// } +// } +// return rewriter.create(state); +// } +// }; +// +// All other binary operations (Sub, Mul, Div, etc.) follow the same pattern, +// so we use the macro to avoid code duplication. +// Generates: FuseAddConstantPattern. DEFINE_BINARY_OP_PATTERN(Add, AddOp) +// Generates: FuseSubConstantPattern. DEFINE_BINARY_OP_PATTERN(Sub, SubOp) +// Generates: FuseMulConstantPattern. DEFINE_BINARY_OP_PATTERN(Mul, MulOp) +// Generates: FuseDivConstantPattern. DEFINE_BINARY_OP_PATTERN(Div, DivOp) +// Generates: FuseRemConstantPattern. DEFINE_BINARY_OP_PATTERN(Rem, RemOp) +// Generates: FuseFAddConstantPattern. DEFINE_BINARY_OP_PATTERN(FAdd, FAddOp) +// Generates: FuseFSubConstantPattern. DEFINE_BINARY_OP_PATTERN(FSub, FSubOp) +// Generates: FuseFMulConstantPattern. DEFINE_BINARY_OP_PATTERN(FMul, FMulOp) // Special case for ICmp with cmp_type attribute. From 5e5e1b725cb9d760c908906003139945e4be8ebb Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 15:01:53 +0800 Subject: [PATCH 10/12] Refactor: use DEFINE_BINARY_OP_PATTERN macro for ICmp, FMax, FMin, and Store patterns --- .../HwAgnosticOpt/FoldConstantPass.cpp | 110 ++---------------- 1 file changed, 11 insertions(+), 99 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 29d55df4..14f153ea 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -277,72 +277,17 @@ DEFINE_BINARY_OP_PATTERN(FAdd, FAddOp) DEFINE_BINARY_OP_PATTERN(FSub, FSubOp) // Generates: FuseFMulConstantPattern. DEFINE_BINARY_OP_PATTERN(FMul, FMulOp) - -// Special case for ICmp with cmp_type attribute. -struct FuseICmpConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; - - Operation *createOpWithFoldedConstants( - neura::ICmpOp op, ArrayRef non_const_operands, - PatternRewriter &rewriter) const override { - // Use generic Operation create and copy attributes. - OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(non_const_operands); - state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated). - for (auto attr : op->getAttrs()) { - if (attr.getName() != "operandSegmentSizes") { - state.addAttribute(attr.getName(), attr.getValue()); - } - } - return rewriter.create(state); - } -}; - -// Special case for FMax with nan_semantic attribute. -struct FuseFMaxConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; - - Operation *createOpWithFoldedConstants( - neura::FMaxOp op, ArrayRef non_const_operands, - PatternRewriter &rewriter) const override { - // Use generic Operation create and copy attributes. - OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(non_const_operands); - state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated). - for (auto attr : op->getAttrs()) { - if (attr.getName() != "operandSegmentSizes") { - state.addAttribute(attr.getName(), attr.getValue()); - } - } - return rewriter.create(state); - } -}; - -// Special case for FMin with nan_semantic attribute. -struct FuseFMinConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; - - Operation *createOpWithFoldedConstants( - neura::FMinOp op, ArrayRef non_const_operands, - PatternRewriter &rewriter) const override { - // Use generic Operation create and copy attributes. - OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(non_const_operands); - state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated). - for (auto attr : op->getAttrs()) { - if (attr.getName() != "operandSegmentSizes") { - state.addAttribute(attr.getName(), attr.getValue()); - } - } - return rewriter.create(state); - } -}; +// Generates: FuseICmpConstantPattern. +// Note: ICmpOp has a cmp_type attribute that is automatically preserved. +DEFINE_BINARY_OP_PATTERN(ICmp, ICmpOp) +// Generates: FuseFMaxConstantPattern. +// Note: FMaxOp has a nan_semantic attribute that is automatically preserved. +DEFINE_BINARY_OP_PATTERN(FMax, FMaxOp) +// Generates: FuseFMinConstantPattern. +// Note: FMinOp has a nan_semantic attribute that is automatically preserved. +DEFINE_BINARY_OP_PATTERN(FMin, FMinOp) +// Generates: FuseStoreConstantPattern. +DEFINE_BINARY_OP_PATTERN(Store, StoreOp) // Pattern for GEP operation (base + indices). struct FuseGEPConstantPattern : public GenericFuseConstantPattern { @@ -404,39 +349,6 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } }; -// Pattern for Store operation (value, addr). -struct FuseStoreConstantPattern - : public GenericFuseConstantPattern { - using GenericFuseConstantPattern::GenericFuseConstantPattern; - - // Store uses lhs_value for value (operand 0) and rhs_value for addr (operand 1). - std::string getAttributeName(size_t operand_idx, size_t total_operands) const override { - if (operand_idx == 0) { - return "lhs_value"; - } else if (operand_idx == 1) { - return "rhs_value"; - } else { - return "operand_" + std::to_string(operand_idx) + "_value"; - } - } - - Operation *createOpWithFoldedConstants( - neura::StoreOp op, ArrayRef non_const_operands, - PatternRewriter &rewriter) const override { - // Use generic Operation create and copy attributes. - OperationState state(op.getLoc(), op.getOperationName()); - state.addOperands(non_const_operands); - state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes (will be auto-generated). - for (auto attr : op->getAttrs()) { - if (attr.getName() != "operandSegmentSizes") { - state.addAttribute(attr.getName(), attr.getValue()); - } - } - return rewriter.create(state); - } -}; - // Pattern for LoadIndexed operation (base + indices). // Only folds the base, never folds indices (required by assemblyFormat). struct FuseLoadIndexedConstantPattern From bfd2ff3146f2e71b357d69df1687a51bf663f610 Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sat, 25 Oct 2025 15:48:42 +0800 Subject: [PATCH 11/12] Refactor: use third-person verb forms in comments --- .../HwAgnosticOpt/FoldConstantPass.cpp | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index 14f153ea..aa051260 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -74,7 +74,7 @@ SmallVector analyzeOperandsForFolding(Operation *op) { return constants_to_fold; } - // First pass: identify which operands are constants. + // First pass: Identifies which operands are constants. SmallVector is_const(num_operands, false); bool has_non_const = false; @@ -86,7 +86,7 @@ SmallVector analyzeOperandsForFolding(Operation *op) { } } - // Second pass: collect constants to fold. + // Second pass: Collects constants to fold. for (size_t i = 0; i < num_operands; ++i) { if (is_const[i]) { // If this is operand 0 and there are no other non-const operands, @@ -141,10 +141,10 @@ struct GenericFuseConstantPattern : public OpRewritePattern { LogicalResult matchAndRewrite(OpType op, PatternRewriter &rewriter) const override { - // Get the original number of operands before folding. + // Gets the original number of operands before folding. size_t num_operands = op->getNumOperands(); - // Analyze operands to determine which can be folded. + // Analyzes operands to determine which can be folded. SmallVector constants_to_fold = analyzeOperandsForFolding(op); // If no constant operands found, nothing to do. @@ -152,8 +152,8 @@ struct GenericFuseConstantPattern : public OpRewritePattern { return failure(); } - // Check if any operands have already been folded. - // Look for any attribute ending with "_value" which indicates constant folding. + // Checks if any operands have already been folded. + // Looks for any attribute ending with "_value" which indicates constant folding. for (auto attr : op->getAttrs()) { StringRef attr_name = attr.getName().getValue(); if (attr_name.ends_with("_value")) { @@ -161,7 +161,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } } - // Build list of non-constant operands. + // Builds list of non-constant operands. SmallVector non_const_operands; SmallVector is_folded(num_operands, false); for (const auto &const_info : constants_to_fold) { @@ -173,7 +173,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } } - // Create the new operation with only non-constant operands. + // Creates the new operation with only non-constant operands. Operation *new_op = createOpWithFoldedConstants( op, non_const_operands, rewriter); @@ -181,16 +181,16 @@ struct GenericFuseConstantPattern : public OpRewritePattern { return failure(); } - // Add constant attributes for each folded operand. + // Adds constant attributes for each folded operand. for (const auto &const_info : constants_to_fold) { std::string attr_name = getAttributeName(const_info.index, num_operands); addConstantAttribute(new_op, attr_name, const_info.const_value); } - // Replace the old operation. + // Replaces the old operation. rewriter.replaceOp(op, new_op->getResults()); - // Clean up unused constant operations. + // Cleans up unused constant operations. for (const auto &const_info : constants_to_fold) { if (const_info.defining_op->use_empty()) { rewriter.eraseOp(const_info.defining_op); @@ -219,11 +219,11 @@ struct GenericFuseConstantPattern : public OpRewritePattern { Operation *createOpWithFoldedConstants( \ neura::OP_TYPE op, ArrayRef non_const_operands, \ PatternRewriter &rewriter) const override { \ - /* Use generic Operation create and copy attributes. */ \ + /* Uses generic Operation create and copy attributes. */ \ OperationState state(op.getLoc(), op.getOperationName()); \ state.addOperands(non_const_operands); \ state.addTypes(op->getResultTypes()); \ - /* Copy attributes except operandSegmentSizes (will be auto-generated). */ \ + /* Copies attributes except operandSegmentSizes (will be auto-generated). */ \ for (auto attr : op->getAttrs()) { \ if (attr.getName() != "operandSegmentSizes") { \ state.addAttribute(attr.getName(), attr.getValue()); \ @@ -233,7 +233,7 @@ struct GenericFuseConstantPattern : public OpRewritePattern { } \ }; -// Define patterns for all binary arithmetic operations. +// Defines patterns for all binary arithmetic operations. // // Note: The macro DEFINE_BINARY_OP_PATTERN expands to create a complete pattern class. // For example, DEFINE_BINARY_OP_PATTERN(Add, AddOp) expands to: @@ -306,13 +306,13 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { neura::GEP op, ArrayRef non_const_operands, PatternRewriter &rewriter) const override { // GEP: operand 0 is base, rest are indices. - // Determine which operands are kept by checking against original. + // Determines which operands are kept by checking against original. Value orig_base = op.getBase(); auto orig_indices = op.getIndices(); bool base_is_const = isOriginConstantOp(orig_base); - // Build operand list and calculate segment sizes. + // Builds operand list and calculates segment sizes. SmallVector operands; int32_t num_base = 0; int32_t num_indices = 0; @@ -329,19 +329,19 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { } } - // Create operation with proper operandSegmentSizes. + // Creates operation with proper operandSegmentSizes. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy attributes except operandSegmentSizes. + // Copies attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Set the correct operandSegmentSizes. + // Sets the correct operandSegmentSizes. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({num_base, num_indices})); @@ -357,12 +357,12 @@ struct FuseLoadIndexedConstantPattern LogicalResult matchAndRewrite(neura::LoadIndexedOp op, PatternRewriter &rewriter) const override { - // Check if already folded. + // Checks if already folded. if (op->hasAttr("lhs_value")) { return failure(); } - // Only check if base is a constant. + // Only checks if base is a constant. Value base = op.getBase(); if (!base || !isOriginConstantOp(base)) { return failure(); @@ -371,35 +371,35 @@ struct FuseLoadIndexedConstantPattern auto constant_op = dyn_cast(base.getDefiningOp()); Attribute base_value = getOriginConstantValue(base); - // Keep all indices unchanged (never fold indices). + // Keeps all indices unchanged (never fold indices). SmallVector indices; for (Value idx : op.getIndices()) { indices.push_back(idx); } - // Create new LoadIndexed without base. + // Creates new LoadIndexed without base. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(indices); // Only indices, no base. state.addTypes(op->getResultTypes()); - // Copy all attributes except operandSegmentSizes. + // Copies all attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Add the folded base value. + // Adds the folded base value. state.addAttribute("lhs_value", base_value); - // Set operandSegmentSizes: 0 base, N indices. + // Sets operandSegmentSizes: 0 base, N indices. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({0, static_cast(indices.size())})); Operation *new_op = rewriter.create(state); rewriter.replaceOp(op, new_op->getResults()); - // Clean up constant if no longer used. + // Cleans up constant if no longer used. if (constant_op->use_empty()) { rewriter.eraseOp(constant_op); } @@ -416,12 +416,12 @@ struct FuseStoreIndexedConstantPattern LogicalResult matchAndRewrite(neura::StoreIndexedOp op, PatternRewriter &rewriter) const override { - // Check if already folded. + // Checks if already folded. if (op->hasAttr("lhs_value") || op->hasAttr("rhs_value")) { return failure(); } - // Check which of value/base are constants. + // Checks which of value/base are constants. Value value = op.getValue(); Value base = op.getBase(); @@ -433,13 +433,13 @@ struct FuseStoreIndexedConstantPattern return failure(); } - // Keep all indices unchanged (never fold indices). + // Keeps all indices unchanged (never fold indices). SmallVector indices; for (Value idx : op.getIndices()) { indices.push_back(idx); } - // Build the new operand list. + // Builds the new operand list. SmallVector operands; int32_t num_value = 0; int32_t num_base = 0; @@ -459,19 +459,19 @@ struct FuseStoreIndexedConstantPattern } int32_t num_indices = indices.size(); - // Create new StoreIndexed. + // Creates new StoreIndexed. OperationState state(op.getLoc(), op.getOperationName()); state.addOperands(operands); state.addTypes(op->getResultTypes()); - // Copy all attributes except operandSegmentSizes. + // Copies all attributes except operandSegmentSizes. for (auto attr : op->getAttrs()) { if (attr.getName() != "operandSegmentSizes") { state.addAttribute(attr.getName(), attr.getValue()); } } - // Add folded constant attributes. + // Adds folded constant attributes. if (value_is_const) { state.addAttribute("lhs_value", getOriginConstantValue(value)); } @@ -479,14 +479,14 @@ struct FuseStoreIndexedConstantPattern state.addAttribute("rhs_value", getOriginConstantValue(base)); } - // Set operandSegmentSizes: num_value, num_base, num_indices. + // Sets operandSegmentSizes: num_value, num_base, num_indices. state.addAttribute("operandSegmentSizes", rewriter.getDenseI32ArrayAttr({num_value, num_base, num_indices})); Operation *new_op = rewriter.create(state); rewriter.replaceOp(op, new_op->getResults()); - // Clean up unused constants. + // Cleans up unused constants. if (value_is_const) { auto const_op = value.getDefiningOp(); if (const_op->use_empty()) { @@ -566,7 +566,7 @@ struct FoldConstantPass ModuleOp module_op = getOperation(); RewritePatternSet patterns(&getContext()); - // Add generic constant folding patterns for all operations. + // Adds generic constant folding patterns for all operations. patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); @@ -579,13 +579,13 @@ struct FoldConstantPass patterns.add(&getContext()); patterns.add(&getContext()); - // Add patterns for memory operations. + // Adds patterns for memory operations. patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); patterns.add(&getContext()); - // Add pattern for grant operations (post-transform). + // Adds pattern for grant operations (post-transform). patterns.add(&getContext()); FrozenRewritePatternSet frozen(std::move(patterns)); From 967decaf21bb33775b9475beb00655a880d96f6f Mon Sep 17 00:00:00 2001 From: tangyz <739245980@qq.com> Date: Sun, 26 Oct 2025 01:04:04 +0800 Subject: [PATCH 12/12] Refactor: improve code comments and variable naming for better readability --- .../HwAgnosticOpt/FoldConstantPass.cpp | 4 --- .../TransformCtrlToDataFlowPass.cpp | 36 ++++++++++--------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp index aa051260..fb154974 100644 --- a/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp +++ b/lib/NeuraDialect/Transforms/Optimizations/HwAgnosticOpt/FoldConstantPass.cpp @@ -354,7 +354,6 @@ struct FuseGEPConstantPattern : public GenericFuseConstantPattern { struct FuseLoadIndexedConstantPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(neura::LoadIndexedOp op, PatternRewriter &rewriter) const override { // Checks if already folded. @@ -367,10 +366,8 @@ struct FuseLoadIndexedConstantPattern if (!base || !isOriginConstantOp(base)) { return failure(); } - auto constant_op = dyn_cast(base.getDefiningOp()); Attribute base_value = getOriginConstantValue(base); - // Keeps all indices unchanged (never fold indices). SmallVector indices; for (Value idx : op.getIndices()) { @@ -413,7 +410,6 @@ struct FuseLoadIndexedConstantPattern struct FuseStoreIndexedConstantPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(neura::StoreIndexedOp op, PatternRewriter &rewriter) const override { // Checks if already folded. diff --git a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp index 0486d2f5..d3a1481b 100644 --- a/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp +++ b/lib/NeuraDialect/Transforms/TransformCtrlToDataFlowPass.cpp @@ -477,7 +477,7 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, createReserveAndPhiOps(region, ctrl_info, arg_to_reserve, arg_to_phi_result, builder); - // Replaces blockarguments with phi results + // Replaces blockarguments with phi results. for (auto &arg_to_phi_pair : arg_to_phi_result) { BlockArgument arg = arg_to_phi_pair.first; Value phi_result = arg_to_phi_pair.second; @@ -485,39 +485,41 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, } // Flattens blocks into the entry block. - // Sort blocks by reverse post-order traversal to maintain SSA dominance + // Sorts blocks by reverse post-order traversal to maintain SSA dominance. Block *entry_block = ®ion.front(); SmallVector blocks_to_flatten; - // Use reverse post-order: visit successors before predecessors - // This ensures that when we move blocks, definitions come before uses + // Uses reverse post-order: visit successors before predecessors. + // This ensures that when we move blocks, definitions come before uses. llvm::SetVector visited; - SmallVector rpo_order; + // Post-order traversal result, used for sorting blocks. + SmallVector po_order; - std::function rpo_traverse = [&](Block *block) { + std::function po_traverse = [&](Block *block) { + // Records visited block and skips if already visited. if (!visited.insert(block)) { return; } - // Visit successors first (post-order) + // Visits successors first (post-order). Operation *terminator = block->getTerminator(); if (auto br = dyn_cast(terminator)) { - rpo_traverse(br.getDest()); + po_traverse(br.getDest()); } else if (auto cond_br = dyn_cast(terminator)) { - rpo_traverse(cond_br.getTrueDest()); - rpo_traverse(cond_br.getFalseDest()); + po_traverse(cond_br.getTrueDest()); + po_traverse(cond_br.getFalseDest()); } - // Add to reverse post-order - rpo_order.push_back(block); + // Adds to post-order. + po_order.push_back(block); }; - rpo_traverse(entry_block); + po_traverse(entry_block); - // Reverse to get correct order (forward traversal) - std::reverse(rpo_order.begin(), rpo_order.end()); + // Reverses post-order for forward traversal. + SmallVector rpo_order(po_order.rbegin(), po_order.rend()); - // Collect non-entry blocks in RPO order + // Collects non-entry blocks in RPO order. for (Block *block : rpo_order) { if (block != entry_block) { blocks_to_flatten.push_back(block); @@ -550,7 +552,7 @@ void transformControlFlowToDataFlow(Region ®ion, ControlFlowInfo &ctrl_info, } } - // Erases now-empty blocks + // Erases now-empty blocks. for (Block *block : blocks_to_flatten) { block->erase(); }