Skip to content

Commit

Permalink
[regalloc][basic] Change spill weight for optsize funcs (llvm#112960)
Browse files Browse the repository at this point in the history
Change the spill weight calculations for `optsize` functions to remove
the block frequency multiplier. For those functions, we do not want to
consider the runtime cost of spilling, only the codesize cost.

I built a large app with the basic and greedy (default) register
allocator enabled.

| Regalloc Type | Uncompressed Size Delta | Compressed Size Delta |
| - | - | - |
| Basic | -303.8 KiB (-0.23%) | -232.0 KiB (-0.39%) |
| Greedy | 159.1 KiB (0.12%) | 130.1 KiB (0.22%) |

Since I only saw a size win with the basic register allocator, I decided
to only change the behavior for that type.
  • Loading branch information
ellishg authored Oct 21, 2024
1 parent f58ce11 commit e6ada71
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 12 deletions.
7 changes: 5 additions & 2 deletions llvm/include/llvm/CodeGen/CalcSpillWeights.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineFunction;
class MachineLoopInfo;
class ProfileSummaryInfo;
class VirtRegMap;

/// Normalize the spill weight of a live interval
Expand Down Expand Up @@ -47,6 +48,7 @@ class VirtRegMap;
LiveIntervals &LIS;
const VirtRegMap &VRM;
const MachineLoopInfo &Loops;
ProfileSummaryInfo *PSI;
const MachineBlockFrequencyInfo &MBFI;

/// Returns true if Reg of live interval LI is used in instruction with many
Expand All @@ -56,8 +58,9 @@ class VirtRegMap;
public:
VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
const VirtRegMap &VRM, const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI)
: MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
const MachineBlockFrequencyInfo &MBFI,
ProfileSummaryInfo *PSI = nullptr)
: MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), PSI(PSI), MBFI(MBFI) {}

virtual ~VirtRegAuxInfo() = default;

Expand Down
9 changes: 7 additions & 2 deletions llvm/include/llvm/CodeGen/LiveIntervals.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class MachineDominatorTree;
class MachineFunction;
class MachineInstr;
class MachineRegisterInfo;
class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class VirtRegMap;
Expand Down Expand Up @@ -113,14 +114,18 @@ class LiveIntervals {
~LiveIntervals();

/// Calculate the spill weight to assign to a single instruction.
/// If \p PSI is provided the calculation is altered for optsize functions.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI);
const MachineInstr &MI,
ProfileSummaryInfo *PSI = nullptr);

/// Calculate the spill weight to assign to a single instruction.
/// If \p PSI is provided the calculation is altered for optsize functions.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB);
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI = nullptr);

LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/CodeGen/CalcSpillWeights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// localLI = COPY other
// ...
// other = COPY localLI
TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
TotalWeight +=
LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB, PSI);
TotalWeight +=
LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB, PSI);

NumInstr += 2;
}
Expand Down Expand Up @@ -272,7 +274,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Calculate instr weight.
bool Reads, Writes;
std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI, PSI);

// Give extra weight to what looks like a loop induction variable update.
if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
Expand Down
19 changes: 15 additions & 4 deletions llvm/lib/CodeGen/LiveIntervals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
Expand Down Expand Up @@ -875,14 +877,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {

float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI) {
return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
const MachineInstr &MI,
ProfileSummaryInfo *PSI) {
return getSpillWeight(isDef, isUse, MBFI, MI.getParent(), PSI);
}

float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB) {
return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI) {
float Weight = isDef + isUse;
const auto *MF = MBB->getParent();
// When optimizing for size we only consider the codesize impact of spilling
// the register, not the runtime impact.
if (PSI && (MF->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(MF, PSI, MBFI)))
return Weight;
return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
}

LiveRange::Segment
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/RegAllocBasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "AllocationOrder.h"
#include "RegAllocBase.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
Expand Down Expand Up @@ -140,6 +141,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)

Expand Down Expand Up @@ -182,6 +184,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequiredID(MachineDominatorsID);
Expand Down Expand Up @@ -312,7 +315,8 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<LiveRegMatrix>());
VirtRegAuxInfo VRAI(
*MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(),
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
VRAI.calculateSpillWeightsAndHints();

SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
Expand Down
168 changes: 168 additions & 0 deletions llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s

; Test that the register allocator behaves differently with minsize functions.

declare void @foo(i32, ptr)

define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize {
; CHECK-LABEL: optsize:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov w23, w5
; CHECK-NEXT: mov x22, x4
; CHECK-NEXT: mov x21, x3
; CHECK-NEXT: mov x20, x2
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: .LBB0_1: // %bb8
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cbz w19, .LBB0_1
; CHECK-NEXT: // %bb.2: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #39
; CHECK-NEXT: b.eq .LBB0_6
; CHECK-NEXT: // %bb.3: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #34
; CHECK-NEXT: b.eq .LBB0_6
; CHECK-NEXT: // %bb.4: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #10
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.5: // %bb9
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: str wzr, [x20]
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_6: // %bb10
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: mov w0, w23
; CHECK-NEXT: mov x1, x21
; CHECK-NEXT: str wzr, [x22]
; CHECK-NEXT: bl foo
; CHECK-NEXT: b .LBB0_1
bb:
br label %bb7

bb7: ; preds = %bb13, %bb
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
br label %bb8

bb8: ; preds = %bb10, %bb9, %bb8, %bb7
switch i32 %arg1, label %bb8 [
i32 10, label %bb9
i32 1, label %bb16
i32 0, label %bb13
i32 39, label %bb10
i32 34, label %bb10
]

bb9: ; preds = %bb8
store i32 0, ptr %arg2, align 4
br label %bb8

bb10: ; preds = %bb8, %bb8
store i32 0, ptr %arg4, align 4
tail call void @foo(i32 %arg5, ptr %arg3)
br label %bb8

bb13: ; preds = %bb8
%not.arg6 = xor i1 %arg6, true
%spec.select = zext i1 %not.arg6 to i32
br label %bb7

bb16: ; preds = %bb8
unreachable
}

define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) {
; CHECK-LABEL: optspeed:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov w22, w5
; CHECK-NEXT: mov x21, x4
; CHECK-NEXT: mov x20, x3
; CHECK-NEXT: mov x23, x2
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: // %bb10
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: mov w0, w22
; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: str wzr, [x21]
; CHECK-NEXT: bl foo
; CHECK-NEXT: .LBB1_2: // %bb8
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmp w19, #33
; CHECK-NEXT: b.gt .LBB1_6
; CHECK-NEXT: // %bb.3: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cbz w19, .LBB1_2
; CHECK-NEXT: // %bb.4: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #10
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: // %bb.5: // %bb9
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x23]
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_6: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #34
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: // %bb.7: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #39
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: b .LBB1_2
bb:
br label %bb7

bb7: ; preds = %bb13, %bb
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
br label %bb8

bb8: ; preds = %bb10, %bb9, %bb8, %bb7
switch i32 %arg1, label %bb8 [
i32 10, label %bb9
i32 1, label %bb16
i32 0, label %bb13
i32 39, label %bb10
i32 34, label %bb10
]

bb9: ; preds = %bb8
store i32 0, ptr %arg2, align 4
br label %bb8

bb10: ; preds = %bb8, %bb8
store i32 0, ptr %arg4, align 4
tail call void @foo(i32 %arg5, ptr %arg3)
br label %bb8

bb13: ; preds = %bb8
%not.arg6 = xor i1 %arg6, true
%spec.select = zext i1 %not.arg6 to i32
br label %bb7

bb16: ; preds = %bb8
unreachable
}

0 comments on commit e6ada71

Please sign in to comment.