Skip to content

Add new live-in patterns#186

Merged
ShangkunLi merged 5 commits intocoredac:mainfrom
ShangkunLi:new-live-in-patterns
Nov 6, 2025
Merged

Add new live-in patterns#186
ShangkunLi merged 5 commits intocoredac:mainfrom
ShangkunLi:new-live-in-patterns

Conversation

@ShangkunLi
Copy link
Copy Markdown
Collaborator

In this pr, we add new patterns to detect new patterns in canonicalize-live-in

// Checks if there's a direct unconditional path from defining_block to
// using_block without crossing any conditional branches.
//
// Pattern Structure:
//    [ Defining Block A ]
//             |  (br)
//             v
//       [ Block B ]
//             |  (br)
//             v
//       [ Block C ]
//             |  (br)
//             v
//    [ Using Block D ]

@ShangkunLi ShangkunLi marked this pull request as ready for review November 6, 2025 02:55
@ShangkunLi ShangkunLi requested a review from tancheng November 6, 2025 02:55
@tancheng
Copy link
Copy Markdown
Contributor

tancheng commented Nov 6, 2025

Can you put variable x/y/z to show what gonna happen after this pattern is identified?

@ShangkunLi
Copy link
Copy Markdown
Collaborator Author

ShangkunLi commented Nov 6, 2025

Can you put variable x/y/z to show what gonna happen after this pattern is identified?

Sure~! Here is an example and its canonicalized version:

module attributes {dlti.dl_spec = #dlti.dl_spec<i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
  func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg6: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", linkage = #llvm.linkage<external>, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
    %0 = "neura.constant"() <{value = "%arg0"}> : () -> i32
    %1 = "neura.constant"() <{value = "%arg1"}> : () -> i32
    %2 = "neura.constant"() <{value = "%arg3"}> : () -> !llvm.ptr
    %3 = "neura.constant"() <{value = "%arg4"}> : () -> !llvm.ptr
    %4 = "neura.constant"() <{value = 3 : i64}> : () -> i64
    %5 = "neura.constant"() <{value = 0 : i8}> : () -> i8
    %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    %7 = "neura.icmp"(%0) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %7 : i1 then to ^bb1 else to ^bb2
  ^bb1:  // pred: ^bb0
    %8 = neura.zext %0 : i32 -> i64
    %9 = "neura.shl"(%8, %4) : (i64, i64) -> i64
    "neura.memset"(%2, %5, %9) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    %10 = "neura.icmp"(%1) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %10 : i1 then to ^bb4 else to ^bb8
  ^bb2:  // pred: ^bb0
    %11 = "neura.icmp"(%1) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %11 : i1 then to ^bb3 else to ^bb8
  ^bb3:  // pred: ^bb2
    %12 = neura.zext %1 : i32 -> i64
    %13 = "neura.shl"(%12, %4) : (i64, i64) -> i64
    "neura.memset"(%3, %5, %13) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    neura.br to ^bb8
  ^bb4:  // pred: ^bb1
    %14 = neura.zext %1 : i32 -> i64
    %15 = neura.zext %0 : i32 -> i64
    neura.br %6 : i64 to ^bb5
  ^bb5(%16: i64):  // 2 preds: ^bb4, ^bb7
    %17 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
    "neura.store"(%17) {lhs_value = 0.000000e+00 : f64} : (!llvm.ptr) -> ()
    %18 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
    neura.br %6 : i64 to ^bb6
  ^bb6(%19: i64):  // 2 preds: ^bb5, ^bb6
    %20 = "neura.gep"(%19) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg3"} : (i64) -> !llvm.ptr
    %21 = "neura.load"(%20) : (!llvm.ptr) -> f64
    %22 = "neura.load"(%18) : (!llvm.ptr) -> f64
    %23 = "neura.gep"(%16, %19) <{operandSegmentSizes = array<i32: 0, 2>}> {lhs_value = "%arg2"} : (i64, i64) -> !llvm.ptr
    %24 = "neura.load"(%23) : (!llvm.ptr) -> f64
    %25 = "neura.fmul_fadd"(%22, %24, %21) : (f64, f64, f64) -> f64
    "neura.store"(%25, %20) : (f64, !llvm.ptr) -> ()
    %26 = "neura.load"(%17) : (!llvm.ptr) -> f64
    %27 = "neura.load"(%23) : (!llvm.ptr) -> f64
    %28 = "neura.gep"(%19) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg5"} : (i64) -> !llvm.ptr
    %29 = "neura.load"(%28) : (!llvm.ptr) -> f64
    %30 = "neura.fmul_fadd"(%27, %29, %26) : (f64, f64, f64) -> f64
    "neura.store"(%30, %17) : (f64, !llvm.ptr) -> ()
    %31 = "neura.add"(%19) {rhs_value = 1 : i64} : (i64) -> i64
    %32 = "neura.icmp"(%31, %15) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %32 : i1 then to ^bb7 else %31 : i64 to ^bb6
  ^bb7:  // pred: ^bb6
    %33 = "neura.add"(%16) {rhs_value = 1 : i64} : (i64) -> i64
    %34 = "neura.icmp"(%33, %14) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %34 : i1 then to ^bb8 else %33 : i64 to ^bb5
  ^bb8:  // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
    "neura.return"() : () -> ()
  }
}

The CFG is:
Screenshot 2025-11-06 at 12 35 36

The live-ins that match this new pattern are:

[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %18 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: <block argument> of type 'i64' at index: 0
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %17 = "neura.gep"(%16) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
  Defining block: ^bb5
  Using block: ^bb6
[CanoLiveIn] Pattern 2 (Direct Unconditional): Found direct dominating live-in
  Value: %15 = neura.zext %0 : i32 -> i64
  Defining block: ^bb4
  Using block: ^bb6

After canonicalization:

module attributes {dlti.dl_spec = #dlti.dl_spec<i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, llvm.ident = "clang version 20.1.7 (https://github.com/llvm/llvm-project.git 6146a88f60492b520a36f8f8f3231e15f3cc6082)"} {
  func.func @kernel(%arg0: i32 {llvm.noundef}, %arg1: i32 {llvm.noundef}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg5: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg6: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}) -> !llvm.void attributes {CConv = #llvm.cconv<ccc>, accelerator = "neura", linkage = #llvm.linkage<external>, memory_effects = #llvm.memory_effects<other = none, argMem = readwrite, inaccessibleMem = none>, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} {
    %0 = "neura.constant"() <{value = "%arg0"}> : () -> i32
    %1 = "neura.constant"() <{value = "%arg1"}> : () -> i32
    %2 = "neura.constant"() <{value = "%arg3"}> : () -> !llvm.ptr
    %3 = "neura.constant"() <{value = "%arg4"}> : () -> !llvm.ptr
    %4 = "neura.constant"() <{value = 3 : i64}> : () -> i64
    %5 = "neura.constant"() <{value = 0 : i8}> : () -> i8
    %6 = "neura.constant"() <{value = 0 : i64}> : () -> i64
    %7 = "neura.icmp"(%0) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %7 : i1 then %0, %4, %2, %5, %1, %6 : i32, i64, !llvm.ptr, i8, i32, i64 to ^bb1 else %1, %4, %3, %5 : i32, i64, !llvm.ptr, i8 to ^bb2
  ^bb1(%8: i32, %9: i64, %10: !llvm.ptr, %11: i8, %12: i32, %13: i64):  // pred: ^bb0
    %14 = neura.zext %8 : i32 -> i64
    %15 = "neura.shl"(%14, %9) : (i64, i64) -> i64
    "neura.memset"(%10, %11, %15) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    %16 = "neura.icmp"(%12) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %16 : i1 then %12, %8, %13 : i32, i32, i64 to ^bb4 else to ^bb8
  ^bb2(%17: i32, %18: i64, %19: !llvm.ptr, %20: i8):  // pred: ^bb0
    %21 = "neura.icmp"(%17) <{cmpType = "sgt"}> {rhs_value = 0 : i32} : (i32) -> i1
    neura.cond_br %21 : i1 then %17, %18, %19, %20 : i32, i64, !llvm.ptr, i8 to ^bb3 else to ^bb8
  ^bb3(%22: i32, %23: i64, %24: !llvm.ptr, %25: i8):  // pred: ^bb2
    %26 = neura.zext %22 : i32 -> i64
    %27 = "neura.shl"(%26, %23) : (i64, i64) -> i64
    "neura.memset"(%24, %25, %27) <{is_volatile = false}> : (!llvm.ptr, i8, i64) -> ()
    neura.br to ^bb8
  ^bb4(%28: i32, %29: i32, %30: i64):  // pred: ^bb1
    %31 = neura.zext %28 : i32 -> i64
    %32 = neura.zext %29 : i32 -> i64
    neura.br %30, %30, %31 : i64, i64, i64 to ^bb5
  ^bb5(%33: i64, %34: i64, %35: i64):  // 2 preds: ^bb4, ^bb7
    %36 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg4"} : (i64) -> !llvm.ptr
    "neura.store"(%36) {lhs_value = 0.000000e+00 : f64} : (!llvm.ptr) -> ()
    %37 = "neura.gep"(%33) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg6"} : (i64) -> !llvm.ptr
    neura.br %34, %33, %35, %34 : i64, i64, i64, i64 to ^bb6
  ^bb6(%38: i64, %39: i64, %40: i64, %41: i64):  // 2 preds: ^bb5, ^bb6
    %42 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg3"} : (i64) -> !llvm.ptr
    %43 = "neura.load"(%42) : (!llvm.ptr) -> f64
    %44 = "neura.load"(%37) : (!llvm.ptr) -> f64
    %45 = "neura.gep"(%39, %38) <{operandSegmentSizes = array<i32: 0, 2>}> {lhs_value = "%arg2"} : (i64, i64) -> !llvm.ptr
    %46 = "neura.load"(%45) : (!llvm.ptr) -> f64
    %47 = "neura.fmul_fadd"(%44, %46, %43) : (f64, f64, f64) -> f64
    "neura.store"(%47, %42) : (f64, !llvm.ptr) -> ()
    %48 = "neura.load"(%36) : (!llvm.ptr) -> f64
    %49 = "neura.load"(%45) : (!llvm.ptr) -> f64
    %50 = "neura.gep"(%38) <{operandSegmentSizes = array<i32: 0, 1>}> {lhs_value = "%arg5"} : (i64) -> !llvm.ptr
    %51 = "neura.load"(%50) : (!llvm.ptr) -> f64
    %52 = "neura.fmul_fadd"(%49, %51, %48) : (f64, f64, f64) -> f64
    "neura.store"(%52, %36) : (f64, !llvm.ptr) -> ()
    %53 = "neura.add"(%38) {rhs_value = 1 : i64} : (i64) -> i64
    %54 = "neura.icmp"(%53, %32) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %54 : i1 then %39, %40, %41 : i64, i64, i64 to ^bb7 else %53, %39, %40, %41 : i64, i64, i64, i64 to ^bb6
  ^bb7(%55: i64, %56: i64, %57: i64):  // pred: ^bb6
    %58 = "neura.add"(%55) {rhs_value = 1 : i64} : (i64) -> i64
    %59 = "neura.icmp"(%58, %56) <{cmpType = "eq"}> : (i64, i64) -> i1
    neura.cond_br %59 : i1 then to ^bb8 else %58, %57, %56 : i64, i64, i64 to ^bb5
  ^bb8:  // 4 preds: ^bb1, ^bb2, ^bb3, ^bb7
    "neura.return"() : () -> ()
  }
}

@tancheng
Copy link
Copy Markdown
Contributor

tancheng commented Nov 6, 2025

Let's use %18 as example, what benefit we can get?

@ShangkunLi
Copy link
Copy Markdown
Collaborator Author

Let's use %18 as example, what benefit we can get?

For %18 = "neura.gep"(%16) in the CDFG-based nerua ir, it corresponds to %37 = "neura.gep"(%33) in the canonicalized ir.

%37 is directly used in %44 = "neura.load"(%37) : (!llvm.ptr) -> f64 and does not need to be passed through block arguments of bb6.

@ShangkunLi ShangkunLi merged commit 5258b34 into coredac:main Nov 6, 2025
1 check passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants