Add new live-in patterns by ShangkunLi · Pull Request #186 · coredac/dataflow

ShangkunLi · 2025-11-06T02:55:20Z

In this pr, we add new patterns to detect new patterns in canonicalize-live-in

// Checks if there's a direct unconditional path from defining_block to
// using_block without crossing any conditional branches.
//
// Pattern Structure:
//    [ Defining Block A ]
//             |  (br)
//             v
//       [ Block B ]
//             |  (br)
//             v
//       [ Block C ]
//             |  (br)
//             v
//    [ Using Block D ]

tancheng · 2025-11-06T03:31:11Z

Can you put variable x/y/z to show what gonna happen after this pattern is identified?

ShangkunLi · 2025-11-06T04:39:41Z

Can you put variable x/y/z to show what gonna happen after this pattern is identified?

Sure~! Here is an example and its canonicalized version:

module attributes {dlti.dl_spec = #dlti.dl_spec<i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
  func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg6: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", linkage = #llvm.linkage<external>, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
    %0 = "neura.constant"() <{value = "%arg0"}> : () -> i32
    %1 = "neura.constant"() <{value = "%arg1"}> : () -> i32
    %2 = "neura.constant"() <{value = "%arg3"}> : () -> !llvm.ptr
    %3 = "neura.constant"() <{value = "%arg4"}> : () -> !llvm.ptr
    %4 = "neura.constant"() <{value = 3 : i64}> : () -> i64
    %5 = "neura.constant"() <{value = 0 : i8}> : () -> i8
    %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    %7 = "neura.icmp"(%0) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %7 : i1 then to ^bb1 else to ^bb2
  ^bb1:  // pred: ^bb0
    %8 = neura.zext %0 : i32 -> i64
    %9 = "neura.shl"(%8, %4) : (i64, i64) -> i64
    "neura.memset"(%2, %5, %9) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    %10 = "neura.icmp"(%1) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %10 : i1 then to ^bb4 else to ^bb8
  ^bb2:  // pred: ^bb0
    %11 = "neura.icmp"(%1) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %11 : i1 then to ^bb3 else to ^bb8
  ^bb3:  // pred: ^bb2
    %12 = neura.zext %1 : i32 -> i64
    %13 = "neura.shl"(%12, %4) : (i64, i64) -> i64
    "neura.memset"(%3, %5, %13) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    neura.br to ^bb8
  ^bb4:  // pred: ^bb1
    %14 = neura.zext %1 : i32 -> i64
    %15 = neura.zext %0 : i32 -> i64
    neura.br %6 : i64 to ^bb5
  ^bb5(%16: i64):  // 2 preds: ^bb4, ^bb7
    %17 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
    "neura.store"(%17) {lhs_value = 0.000000e+00 : f64} : (!llvm.ptr) -> ()
    %18 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
    neura.br %6 : i64 to ^bb6
  ^bb6(%19: i64):  // 2 preds: ^bb5, ^bb6
    %20 = "neura.gep"(%19) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg3"} : (i64) -> !llvm.ptr
    %21 = "neura.load"(%20) : (!llvm.ptr) -> f64
    %22 = "neura.load"(%18) : (!llvm.ptr) -> f64
    %23 = "neura.gep"(%16, %19) <{operandSegmentSizes = array<i32: 0, 2>}> {lhs_value = "%arg2"} : (i64, i64) -> !llvm.ptr
    %24 = "neura.load"(%23) : (!llvm.ptr) -> f64
    %25 = "neura.fmul_fadd"(%22, %24, %21) : (f64, f64, f64) -> f64
    "neura.store"(%25, %20) : (f64, !llvm.ptr) -> ()
    %26 = "neura.load"(%17) : (!llvm.ptr) -> f64
    %27 = "neura.load"(%23) : (!llvm.ptr) -> f64
    %28 = "neura.gep"(%19) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg5"} : (i64) -> !llvm.ptr
    %29 = "neura.load"(%28) : (!llvm.ptr) -> f64
    %30 = "neura.fmul_fadd"(%27, %29, %26) : (f64, f64, f64) -> f64
    "neura.store"(%30, %17) : (f64, !llvm.ptr) -> ()
    %31 = "neura.add"(%19) {rhs_value = 1 : i64} : (i64) -> i64
    %32 = "neura.icmp"(%31, %15) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %32 : i1 then to ^bb7 else %31 : i64 to ^bb6
  ^bb7:  // pred: ^bb6
    %33 = "neura.add"(%16) {rhs_value = 1 : i64} : (i64) -> i64
    %34 = "neura.icmp"(%33, %14) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %34 : i1 then to ^bb8 else %33 : i64 to ^bb5
  ^bb8:  // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
    "neura.return"() : () -> ()
  }
}

The CFG is:

The live-ins that match this new pattern are:

[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %18 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: <block argument> of type 'i64' at index: 0
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %17 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %15 = neura.zext %0 : i32 -> i64
  Defining block: ^bb4
  Using block: ^bb6

After canonicalization:

module attributes {dlti.dl_spec = #dlti.dl_spec<i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
  func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg6: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", linkage = #llvm.linkage<external>, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
    %0 = "neura.constant"() <{value = "%arg0"}> : () -> i32
    %1 = "neura.constant"() <{value = "%arg1"}> : () -> i32
    %2 = "neura.constant"() <{value = "%arg3"}> : () -> !llvm.ptr
    %3 = "neura.constant"() <{value = "%arg4"}> : () -> !llvm.ptr
    %4 = "neura.constant"() <{value = 3 : i64}> : () -> i64
    %5 = "neura.constant"() <{value = 0 : i8}> : () -> i8
    %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    %7 = "neura.icmp"(%0) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %7 : i1 then %0, %4, %2, %5, %1, %6 : i32, i64, !llvm.ptr, i8, i32, i64 to ^bb1 else %1, %4, %3, %5 : i32, i64, !llvm.ptr, i8 to ^bb2
  ^bb1(%8: i32, %9: i64, %10: !llvm.ptr, %11: i8, %12: i32, %13: i64):  // pred: ^bb0
    %14 = neura.zext %8 : i32 -> i64
    %15 = "neura.shl"(%14, %9) : (i64, i64) -> i64
    "neura.memset"(%10, %11, %15) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    %16 = "neura.icmp"(%12) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %16 : i1 then %12, %8, %13 : i32, i32, i64 to ^bb4 else to ^bb8
  ^bb2(%17: i32, %18: i64, %19: !llvm.ptr, %20: i8):  // pred: ^bb0
    %21 = "neura.icmp"(%17) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %21 : i1 then %17, %18, %19, %20 : i32, i64, !llvm.ptr, i8 to ^bb3 else to ^bb8
  ^bb3(%22: i32, %23: i64, %24: !llvm.ptr, %25: i8):  // pred: ^bb2
    %26 = neura.zext %22 : i32 -> i64
    %27 = "neura.shl"(%26, %23) : (i64, i64) -> i64
    "neura.memset"(%24, %25, %27) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    neura.br to ^bb8
  ^bb4(%28: i32, %29: i32, %30: i64):  // pred: ^bb1
    %31 = neura.zext %28 : i32 -> i64
    %32 = neura.zext %29 : i32 -> i64
    neura.br %30, %30, %31 : i64, i64, i64 to ^bb5
  ^bb5(%33: i64, %34: i64, %35: i64):  // 2 preds: ^bb4, ^bb7
    %36 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
    "neura.store"(%36) {lhs_value = 0.000000e+00 : f64} : (!llvm.ptr) -> ()
    %37 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
    neura.br %34, %33, %35, %34 : i64, i64, i64, i64 to ^bb6
  ^bb6(%38: i64, %39: i64, %40: i64, %41: i64):  // 2 preds: ^bb5, ^bb6
    %42 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg3"} : (i64) -> !llvm.ptr
    %43 = "neura.load"(%42) : (!llvm.ptr) -> f64
    %44 = "neura.load"(%37) : (!llvm.ptr) -> f64
    %45 = "neura.gep"(%39, %38) <{operandSegmentSizes = array<i32: 0, 2>}> {lhs_value = "%arg2"} : (i64, i64) -> !llvm.ptr
    %46 = "neura.load"(%45) : (!llvm.ptr) -> f64
    %47 = "neura.fmul_fadd"(%44, %46, %43) : (f64, f64, f64) -> f64
    "neura.store"(%47, %42) : (f64, !llvm.ptr) -> ()
    %48 = "neura.load"(%36) : (!llvm.ptr) -> f64
    %49 = "neura.load"(%45) : (!llvm.ptr) -> f64
    %50 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg5"} : (i64) -> !llvm.ptr
    %51 = "neura.load"(%50) : (!llvm.ptr) -> f64
    %52 = "neura.fmul_fadd"(%49, %51, %48) : (f64, f64, f64) -> f64
    "neura.store"(%52, %36) : (f64, !llvm.ptr) -> ()
    %53 = "neura.add"(%38) {rhs_value = 1 : i64} : (i64) -> i64
    %54 = "neura.icmp"(%53, %32) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %54 : i1 then %39, %40, %41 : i64, i64, i64 to ^bb7 else %53, %39, %40, %41 : i64, i64, i64, i64 to ^bb6
  ^bb7(%55: i64, %56: i64, %57: i64):  // pred: ^bb6
    %58 = "neura.add"(%55) {rhs_value = 1 : i64} : (i64) -> i64
    %59 = "neura.icmp"(%58, %56) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %59 : i1 then to ^bb8 else %58, %57, %56 : i64, i64, i64 to ^bb5
  ^bb8:  // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
    "neura.return"() : () -> ()
  }
}

tancheng · 2025-11-06T04:54:12Z

Let's use %18 as example, what benefit we can get?

ShangkunLi · 2025-11-06T05:25:03Z

Let's use %18 as example, what benefit we can get?

For %18 = "neura.gep"(%16) in the CDFG-based nerua ir, it corresponds to %37 = "neura.gep"(%33) in the canonicalized ir.

%37 is directly used in %44 = "neura.load"(%37) : (!llvm.ptr) -> f64 and does not need to be passed through block arguments of bb6.

lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp

test/e2e/bicg/bicg_kernel.mlir

.gitmodules

ShangkunLi added 3 commits November 5, 2025 10:57

modify .gitmodules

6b02b76

support unconditional direct dominating live-in pattern

06c15e6

add comments

e50573c

ShangkunLi marked this pull request as ready for review November 6, 2025 02:55

ShangkunLi requested a review from tancheng November 6, 2025 02:55

remove unecessary debug output

c8217ab

tancheng approved these changes Nov 6, 2025

View reviewed changes

lib/NeuraDialect/Transforms/CanonicalizeLiveInPass.cpp Show resolved Hide resolved

test/e2e/bicg/bicg_kernel.mlir Outdated Show resolved Hide resolved

.gitmodules Show resolved Hide resolved

simplify func attributes

d279379

ShangkunLi merged commit 5258b34 into coredac:main Nov 6, 2025
1 check passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add new live-in patterns#186

Add new live-in patterns#186
ShangkunLi merged 5 commits intocoredac:mainfrom
ShangkunLi:new-live-in-patterns

ShangkunLi commented Nov 6, 2025

Uh oh!

tancheng commented Nov 6, 2025

Uh oh!

ShangkunLi commented Nov 6, 2025 •

edited

Loading

Uh oh!

tancheng commented Nov 6, 2025

Uh oh!

ShangkunLi commented Nov 6, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

ShangkunLi commented Nov 6, 2025

Uh oh!

tancheng commented Nov 6, 2025

Uh oh!

ShangkunLi commented Nov 6, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

tancheng commented Nov 6, 2025

Uh oh!

ShangkunLi commented Nov 6, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

ShangkunLi commented Nov 6, 2025 •

edited

Loading