Skip to content

Commit e6ac6be

Browse files
bcheng0127igcbot
authored andcommitted
local variable split update
local variable split update
1 parent 0e61ae3 commit e6ac6be

File tree

5 files changed

+74
-8
lines changed

5 files changed

+74
-8
lines changed

visa/G4_Kernel.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,13 @@ void G4_Kernel::calculateSimdSize() {
620620
computeChannelSlicing();
621621
}
622622

623+
bool G4_Kernel::canUpdateKernelToLargerGRF() {
624+
if (numRegTotal == grfMode.getMaxGRF())
625+
return false;
626+
627+
return true;
628+
}
629+
623630
//
624631
// Updates kernel's related structures to smaller GRF
625632
//

visa/G4_Kernel.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ class G4_Kernel {
737737
const char *getName() const { return name; }
738738

739739
bool updateKernelToSmallerGRF();
740+
bool canUpdateKernelToLargerGRF();
740741
bool updateKernelToLargerGRF();
741742
void updateKernelByRegPressure(unsigned regPressure,
742743
bool forceGRFModeUp = false);

visa/GraphColor.cpp

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10078,7 +10078,7 @@ void VarSplit::globalSplit(IR_Builder &builder, G4_Kernel &kernel) {
1007810078
return;
1007910079
}
1008010080

10081-
void VarSplit::localSplit(IR_Builder &builder, G4_BB *bb) {
10081+
int VarSplit::localSplit(IR_Builder &builder, G4_BB *bb) {
1008210082
class CmpRegVarId {
1008310083
public:
1008410084
bool operator()(G4_RegVar *first, G4_RegVar *second) const {
@@ -10099,7 +10099,7 @@ void VarSplit::localSplit(IR_Builder &builder, G4_BB *bb) {
1009910099
bool hasSends = std::any_of(bb->begin(), bb->end(),
1010010100
[](G4_INST *inst) { return inst->isSend(); });
1010110101
if (!hasSends)
10102-
return;
10102+
return 0;
1010310103

1010410104
//
1010510105
// Iterate instruction in BB from back to front
@@ -10315,7 +10315,7 @@ void VarSplit::localSplit(IR_Builder &builder, G4_BB *bb) {
1031510315
toDelete.pop();
1031610316
}
1031710317

10318-
return;
10318+
return splitid;
1031910319
}
1032010320

1032110321
void GlobalRA::addrRegAlloc() {
@@ -11198,15 +11198,17 @@ bool GlobalRA::globalSplit(VarSplit& splitPass, GraphColor& coloring) {
1119811198
return false;
1119911199
}
1120011200

11201-
void GlobalRA::localSplit(bool fastCompile, VarSplit& splitPass) {
11201+
int GlobalRA::localSplit(bool fastCompile, VarSplit& splitPass) {
1120211202
// Do variable splitting in each iteration
1120311203
// Don't do when fast compile is required
11204+
int splitCount = 0;
1120411205
if (builder.getOption(vISA_LocalDeclareSplitInGlobalRA) && !fastCompile) {
1120511206
RA_TRACE(std::cout << "\t--split local send--\n");
1120611207
for (auto bb : kernel.fg) {
11207-
splitPass.localSplit(builder, bb);
11208+
splitCount += splitPass.localSplit(builder, bb);
1120811209
}
1120911210
}
11211+
return splitCount;
1121011212
}
1121111213

1121211214
std::pair<bool, bool> GlobalRA::bankConflict() {
@@ -11301,6 +11303,26 @@ bool GlobalRA::VRTIncreasedGRF(GraphColor &coloring) {
1130111303
return false;
1130211304
}
1130311305

11306+
bool GlobalRA::canVRTIncreasedGRF(GraphColor &coloring) {
11307+
if (kernel.useAutoGRFSelection()) {
11308+
bool infCostSpilled =
11309+
coloring.getSpilledLiveRanges().end() !=
11310+
std::find_if(coloring.getSpilledLiveRanges().begin(),
11311+
coloring.getSpilledLiveRanges().end(),
11312+
[](const LiveRange *spilledLR) {
11313+
return spilledLR->getSpillCost() == MAXSPILLCOST;
11314+
});
11315+
if ((infCostSpilled || kernel.grfMode.hasLargerGRFSameThreads() ||
11316+
computeSpillSize(coloring.getSpilledLiveRanges()) >
11317+
kernel.grfMode.getSpillThreshold())) {
11318+
if (kernel.canUpdateKernelToLargerGRF()) {
11319+
return true;
11320+
}
11321+
}
11322+
}
11323+
return false;
11324+
}
11325+
1130411326
void GlobalRA::splitOnSpill(bool fastCompile, GraphColor &coloring,
1130511327
LivenessAnalysis &liveAnalysis) {
1130611328
if (!kernel.getOption(vISA_Debug) && getIterNo() == 0 && !fastCompile &&
@@ -11733,6 +11755,7 @@ int GlobalRA::coloringRegAlloc() {
1173311755
}
1173411756

1173511757
bool rematDone = false, alignedScalarSplitDone = false;
11758+
bool loadSplitTryDone = false;
1173611759
bool reserveSpillReg = false;
1173711760
VarSplit splitPass(*this);
1173811761
DynPerfModel perfModel(kernel);
@@ -11764,7 +11787,15 @@ int GlobalRA::coloringRegAlloc() {
1176411787
spillAnalysis->Clear();
1176511788
}
1176611789

11767-
localSplit(fastCompile, splitPass);
11790+
// 1. For legacy, always do localSpllit for old platforms.
11791+
// 2. For new platforms:
11792+
// a) Do localSplit when iteration 0 failed RA
11793+
// b) Always do localSplit for spill iterations, which may generate local
11794+
// split candidate.
11795+
if (iterationNo > 0 || loadSplitTryDone ||
11796+
!builder.onlyDoLocalVariableSplitWhenSpill()) {
11797+
localSplit(fastCompile, splitPass);
11798+
}
1176811799

1176911800
const auto [doBankConflictReduction, highInternalConflict] = bankConflict();
1177011801

@@ -11823,6 +11854,19 @@ int GlobalRA::coloringRegAlloc() {
1182311854
bool isColoringGood =
1182411855
coloring.regAlloc(doBankConflictReduction, highInternalConflict, &rpe);
1182511856
if (!isColoringGood) {
11857+
// Retry with local variable splitting if there is potential chance for
11858+
// VRT bump up.
11859+
if (builder.onlyDoLocalVariableSplitWhenSpill()) {
11860+
if (!loadSplitTryDone && canVRTIncreasedGRF(coloring)) {
11861+
if (localSplit(fastCompile, splitPass) > 0) {
11862+
loadSplitTryDone = true;
11863+
// Run one more iteration 0 with local split to avoid unnecessary
11864+
// bump up
11865+
continue;
11866+
}
11867+
}
11868+
}
11869+
1182611870
// When there are spills and -abortonspill is set, vISA will bump up the
1182711871
// number of GRFs first and try to compile without spills under one of
1182811872
// the following conditions:
@@ -11849,6 +11893,15 @@ int GlobalRA::coloringRegAlloc() {
1184911893
if (rerunGRAIter(rerunGRA1 || rerunGRA2 || rerunGRA3))
1185011894
continue;
1185111895

11896+
// For new platforms, check if there is local split space
11897+
if (!loadSplitTryDone && builder.onlyDoLocalVariableSplitWhenSpill()) {
11898+
if (localSplit(fastCompile, splitPass) > 0) {
11899+
loadSplitTryDone = true;
11900+
// Run one more iteration 0 for local split
11901+
continue;
11902+
}
11903+
}
11904+
1185211905
splitOnSpill(fastCompile, coloring, liveAnalysis);
1185311906

1185411907
reserveSpillReg = convertToFailSafe(reserveSpillReg, coloring, liveAnalysis,

visa/GraphColor.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,7 +2178,7 @@ class GlobalRA {
21782178
bool alignedScalarSplitDone,
21792179
GraphColor &coloring);
21802180
bool globalSplit(VarSplit &splitPass, GraphColor &coloring);
2181-
void localSplit(bool fastCompile, VarSplit &splitPass);
2181+
int localSplit(bool fastCompile, VarSplit &splitPass);
21822182
// return <doBCReduction, highInternalConflict>
21832183
std::pair<bool, bool> bankConflict();
21842184
// return reserveSpillReg
@@ -2190,6 +2190,7 @@ class GlobalRA {
21902190
FINALIZER_INFO *jitInfo);
21912191
void writeVerboseRPEStats(RPE &rpe);
21922192
bool VRTIncreasedGRF(GraphColor &coloring);
2193+
bool canVRTIncreasedGRF(GraphColor &coloring);
21932194
void splitOnSpill(bool fastCompile, GraphColor &coloring,
21942195
LivenessAnalysis &livenessAnalysis);
21952196
bool convertToFailSafe(bool reserveSpillReg, GraphColor &coloring,
@@ -2259,7 +2260,7 @@ class VarSplit {
22592260
bool didLocalSplit = false;
22602261
bool didGlobalSplit = false;
22612262

2262-
void localSplit(IR_Builder &builder, G4_BB *bb);
2263+
int localSplit(IR_Builder &builder, G4_BB *bb);
22632264
void globalSplit(IR_Builder &builder, G4_Kernel &kernel);
22642265
bool canDoGlobalSplit(IR_Builder &builder, G4_Kernel &kernel,
22652266
uint32_t sendSpillRefCount);

visa/HWCaps.inc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,10 @@ bool supports4GRFAlign() const {
918918
return false;
919919
}
920920

921+
bool onlyDoLocalVariableSplitWhenSpill() const {
922+
return false;
923+
}
924+
921925
bool needA0WAR() const {
922926
return (getPlatform() >= Xe2);
923927
}

0 commit comments

Comments
 (0)