forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[regalloc][basic] Change spill weight for optsize funcs (llvm#112960)
Change the spill weight calculations for `optsize` functions to remove the block frequency multiplier. For those functions, we do not want to consider the runtime cost of spilling, only the codesize cost. I built a large app with the basic and greedy (default) register allocator enabled. | Regalloc Type | Uncompressed Size Delta | Compressed Size Delta | | - | - | - | | Basic | -303.8 KiB (-0.23%) | -232.0 KiB (-0.39%) | | Greedy | 159.1 KiB (0.12%) | 130.1 KiB (0.22%) | Since I only saw a size win with the basic register allocator, I decided to only change the behavior for that type.
- Loading branch information
Showing
6 changed files
with
205 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
168 changes: 168 additions & 0 deletions
168
llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
|
||
; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s | ||
|
||
; Test that the register allocator behaves differently with minsize functions. | ||
|
||
declare void @foo(i32, ptr) | ||
|
||
define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize { | ||
; CHECK-LABEL: optsize: | ||
; CHECK: // %bb.0: // %bb | ||
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill | ||
; CHECK-NEXT: .cfi_def_cfa_offset 48 | ||
; CHECK-NEXT: .cfi_offset w19, -8 | ||
; CHECK-NEXT: .cfi_offset w20, -16 | ||
; CHECK-NEXT: .cfi_offset w21, -24 | ||
; CHECK-NEXT: .cfi_offset w22, -32 | ||
; CHECK-NEXT: .cfi_offset w23, -40 | ||
; CHECK-NEXT: .cfi_offset w30, -48 | ||
; CHECK-NEXT: mov w23, w5 | ||
; CHECK-NEXT: mov x22, x4 | ||
; CHECK-NEXT: mov x21, x3 | ||
; CHECK-NEXT: mov x20, x2 | ||
; CHECK-NEXT: mov w19, w1 | ||
; CHECK-NEXT: .LBB0_1: // %bb8 | ||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: cbz w19, .LBB0_1 | ||
; CHECK-NEXT: // %bb.2: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 | ||
; CHECK-NEXT: cmp w19, #39 | ||
; CHECK-NEXT: b.eq .LBB0_6 | ||
; CHECK-NEXT: // %bb.3: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 | ||
; CHECK-NEXT: cmp w19, #34 | ||
; CHECK-NEXT: b.eq .LBB0_6 | ||
; CHECK-NEXT: // %bb.4: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 | ||
; CHECK-NEXT: cmp w19, #10 | ||
; CHECK-NEXT: b.ne .LBB0_1 | ||
; CHECK-NEXT: // %bb.5: // %bb9 | ||
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 | ||
; CHECK-NEXT: str wzr, [x20] | ||
; CHECK-NEXT: b .LBB0_1 | ||
; CHECK-NEXT: .LBB0_6: // %bb10 | ||
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 | ||
; CHECK-NEXT: mov w0, w23 | ||
; CHECK-NEXT: mov x1, x21 | ||
; CHECK-NEXT: str wzr, [x22] | ||
; CHECK-NEXT: bl foo | ||
; CHECK-NEXT: b .LBB0_1 | ||
bb: | ||
br label %bb7 | ||
|
||
bb7: ; preds = %bb13, %bb | ||
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ] | ||
br label %bb8 | ||
|
||
bb8: ; preds = %bb10, %bb9, %bb8, %bb7 | ||
switch i32 %arg1, label %bb8 [ | ||
i32 10, label %bb9 | ||
i32 1, label %bb16 | ||
i32 0, label %bb13 | ||
i32 39, label %bb10 | ||
i32 34, label %bb10 | ||
] | ||
|
||
bb9: ; preds = %bb8 | ||
store i32 0, ptr %arg2, align 4 | ||
br label %bb8 | ||
|
||
bb10: ; preds = %bb8, %bb8 | ||
store i32 0, ptr %arg4, align 4 | ||
tail call void @foo(i32 %arg5, ptr %arg3) | ||
br label %bb8 | ||
|
||
bb13: ; preds = %bb8 | ||
%not.arg6 = xor i1 %arg6, true | ||
%spec.select = zext i1 %not.arg6 to i32 | ||
br label %bb7 | ||
|
||
bb16: ; preds = %bb8 | ||
unreachable | ||
} | ||
|
||
define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) { | ||
; CHECK-LABEL: optspeed: | ||
; CHECK: // %bb.0: // %bb | ||
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill | ||
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill | ||
; CHECK-NEXT: .cfi_def_cfa_offset 48 | ||
; CHECK-NEXT: .cfi_offset w19, -8 | ||
; CHECK-NEXT: .cfi_offset w20, -16 | ||
; CHECK-NEXT: .cfi_offset w21, -24 | ||
; CHECK-NEXT: .cfi_offset w22, -32 | ||
; CHECK-NEXT: .cfi_offset w23, -40 | ||
; CHECK-NEXT: .cfi_offset w30, -48 | ||
; CHECK-NEXT: mov w22, w5 | ||
; CHECK-NEXT: mov x21, x4 | ||
; CHECK-NEXT: mov x20, x3 | ||
; CHECK-NEXT: mov x23, x2 | ||
; CHECK-NEXT: mov w19, w1 | ||
; CHECK-NEXT: b .LBB1_2 | ||
; CHECK-NEXT: .LBB1_1: // %bb10 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: mov w0, w22 | ||
; CHECK-NEXT: mov x1, x20 | ||
; CHECK-NEXT: str wzr, [x21] | ||
; CHECK-NEXT: bl foo | ||
; CHECK-NEXT: .LBB1_2: // %bb8 | ||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: cmp w19, #33 | ||
; CHECK-NEXT: b.gt .LBB1_6 | ||
; CHECK-NEXT: // %bb.3: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: cbz w19, .LBB1_2 | ||
; CHECK-NEXT: // %bb.4: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: cmp w19, #10 | ||
; CHECK-NEXT: b.ne .LBB1_2 | ||
; CHECK-NEXT: // %bb.5: // %bb9 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: str wzr, [x23] | ||
; CHECK-NEXT: b .LBB1_2 | ||
; CHECK-NEXT: .LBB1_6: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: cmp w19, #34 | ||
; CHECK-NEXT: b.eq .LBB1_1 | ||
; CHECK-NEXT: // %bb.7: // %bb8 | ||
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 | ||
; CHECK-NEXT: cmp w19, #39 | ||
; CHECK-NEXT: b.eq .LBB1_1 | ||
; CHECK-NEXT: b .LBB1_2 | ||
bb: | ||
br label %bb7 | ||
|
||
bb7: ; preds = %bb13, %bb | ||
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ] | ||
br label %bb8 | ||
|
||
bb8: ; preds = %bb10, %bb9, %bb8, %bb7 | ||
switch i32 %arg1, label %bb8 [ | ||
i32 10, label %bb9 | ||
i32 1, label %bb16 | ||
i32 0, label %bb13 | ||
i32 39, label %bb10 | ||
i32 34, label %bb10 | ||
] | ||
|
||
bb9: ; preds = %bb8 | ||
store i32 0, ptr %arg2, align 4 | ||
br label %bb8 | ||
|
||
bb10: ; preds = %bb8, %bb8 | ||
store i32 0, ptr %arg4, align 4 | ||
tail call void @foo(i32 %arg5, ptr %arg3) | ||
br label %bb8 | ||
|
||
bb13: ; preds = %bb8 | ||
%not.arg6 = xor i1 %arg6, true | ||
%spec.select = zext i1 %not.arg6 to i32 | ||
br label %bb7 | ||
|
||
bb16: ; preds = %bb8 | ||
unreachable | ||
} |