Skip to content

Commit bb6902d

Browse files
committed
Change SPIRV debugger control flow to emulate maximal reconvergence
1 parent 8d65131 commit bb6902d

File tree

3 files changed

+125
-10
lines changed

3 files changed

+125
-10
lines changed

renderdoc/driver/shaders/spirv/spirv_debug.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,9 @@ void ThreadState::JumpToLabel(Id target)
617617

618618
frame->lastBlock = frame->curBlock;
619619
frame->curBlock = target;
620+
enteredBlocks.push_back(frame->curBlock.value());
621+
622+
diverged = true;
620623

621624
nextInstruction = debugger.GetInstructionForLabel(target) + 1;
622625

@@ -734,6 +737,8 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
734737

735738
Iter it = debugger.GetIterForInstruction(nextInstruction);
736739
nextInstruction++;
740+
diverged = false;
741+
enteredBlocks.clear();
737742

738743
OpDecoder opdata(it);
739744

renderdoc/driver/shaders/spirv/spirv_debug.h

+8
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "api/replay/rdcarray.h"
2828
#include "maths/vec.h"
2929
#include "spirv_common.h"
30+
#include "spirv_controlflow.h"
3031
#include "spirv_processor.h"
3132

3233
struct SPIRVInterfaceAccess;
@@ -231,6 +232,11 @@ struct ThreadState
231232
// the list of IDs that are currently valid and live
232233
rdcarray<Id> live;
233234

235+
// true if executed an operation which could trigger divergence
236+
bool diverged;
237+
// list of blocks that were entered in a single step (used for tracking thread convergence)
238+
rdcarray<uint32_t> enteredBlocks;
239+
234240
std::map<Id, uint32_t> lastWrite;
235241

236242
// quad ID (arbitrary, just used to find neighbours for derivatives)
@@ -528,6 +534,8 @@ class Debugger : public Processor, public ShaderDebugger
528534
rdcarray<LocalMapping> activeLocalMappings;
529535
} m_DebugInfo;
530536

537+
rdcspv::ControlFlow controlFlow;
538+
531539
const ScopeData *GetScope(size_t offset) const;
532540
};
533541

renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp

+112-10
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15611561

15621562
std::sort(liveGlobals.begin(), liveGlobals.end());
15631563

1564+
rdcarray<rdcspv::ThreadIndex> threadIds;
15641565
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
15651566
{
15661567
ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15891590
// now that the globals are allocated and their storage won't move, we can take pointers to them
15901591
for(const PointerId &p : pointerIDs)
15911592
p.Set(*this, global, lane);
1593+
1594+
// Only add active lanes to control flow
1595+
if(!lane.dead)
1596+
threadIds.push_back(i);
15921597
}
15931598

1599+
controlFlow.Construct(threadIds);
1600+
15941601
// find quad neighbours
15951602
{
15961603
rdcarray<uint32_t> processedQuads;
@@ -2454,6 +2461,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24542461
if(steps == 0)
24552462
{
24562463
ShaderDebugState initial;
2464+
uint32_t startBlock = INVALID_BLOCK_INDEX;
24572465

24582466
// we should be sitting at the entry point function prologue, step forward into the first block
24592467
// and past any function-local variable declarations
@@ -2466,6 +2474,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24662474
thread.EnterEntryPoint(&initial);
24672475
FillCallstack(thread, initial);
24682476
initial.nextInstruction = thread.nextInstruction;
2477+
startBlock = thread.callstack.back()->curBlock.value();
24692478
}
24702479
else
24712480
{
@@ -2487,6 +2496,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24872496

24882497
ret.push_back(std::move(initial));
24892498

2499+
// Set the initial block for the threads in the root tangle
2500+
ThreadBlockStates threadBlockStates;
2501+
TangleGroup &tangles = controlFlow.GetTangles();
2502+
RDCASSERTEQUAL(tangles.size(), 1);
2503+
RDCASSERTNOTEQUAL(startBlock, INVALID_BLOCK_INDEX);
2504+
for(Tangle &tangle : tangles)
2505+
{
2506+
RDCASSERT(tangle.IsAliveActive());
2507+
for(uint32_t threadIdx = 0; threadIdx < workgroup.size(); ++threadIdx)
2508+
threadBlockStates[threadIdx].push_back(startBlock);
2509+
}
2510+
controlFlow.UpdateState(threadBlockStates);
24902511
steps++;
24912512
}
24922513

@@ -2505,29 +2526,66 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25052526
if(active.Finished())
25062527
break;
25072528

2508-
// calculate the current mask of which threads are active
2509-
CalcActiveMask(activeMask);
2529+
// Execute the threads in each active tangle
2530+
ThreadBlockStates threadBlockStates;
2531+
TangleGroup &tangles = controlFlow.GetTangles();
25102532

2511-
// step all active members of the workgroup
2512-
for(size_t lane = 0; lane < workgroup.size(); lane++)
2533+
for(Tangle &tangle : tangles)
25132534
{
2514-
ThreadState &thread = workgroup[lane];
2535+
if(!tangle.IsAliveActive())
2536+
continue;
2537+
2538+
rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs();
2539+
// calculate the current active thread mask from the threads in the tangle
2540+
{
2541+
// one bool per workgroup thread
2542+
activeMask.resize(workgroup.size());
25152543

2516-
if(activeMask[lane])
2544+
// start with all threads as inactive
2545+
for(size_t i = 0; i < workgroup.size(); i++)
2546+
activeMask[i] = false;
2547+
2548+
// activate the threads in the tangle
2549+
for(const rdcspv::ThreadReference &ref : threadRefs)
2550+
{
2551+
uint32_t idx = ref.id;
2552+
RDCASSERT(idx < workgroup.size(), idx, workgroup.size());
2553+
RDCASSERT(!workgroup[idx].Finished());
2554+
activeMask[idx] = true;
2555+
}
2556+
}
2557+
2558+
BlockIndex newMergeBlock = INVALID_BLOCK_INDEX;
2559+
uint32_t countActiveThreads = 0;
2560+
uint32_t countDivergedThreads = 0;
2561+
uint32_t countMergeBlockThreads = 0;
2562+
2563+
// step all active members of the workgroup
2564+
for(size_t lane = 0; lane < workgroup.size(); lane++)
25172565
{
2518-
if(thread.nextInstruction >= instructionOffsets.size())
2566+
if(!activeMask[lane])
2567+
continue;
2568+
++countActiveThreads;
2569+
2570+
ThreadState &thread = workgroup[lane];
2571+
const uint32_t currentPC = thread.nextInstruction;
2572+
const uint32_t threadId = lane;
2573+
RDCASSERTEQUAL(thread.callstack.back()->curBlock.value(), tangle.GetBlockIndex());
2574+
if(currentPC >= instructionOffsets.size())
25192575
{
25202576
if(lane == activeLaneIndex)
25212577
ret.emplace_back();
25222578

2579+
tangle.SetThreadDead(threadId);
25232580
continue;
25242581
}
2582+
Id prevConvergeBlock = thread.mergeBlock;
25252583

25262584
if(lane == activeLaneIndex)
25272585
{
25282586
ShaderDebugState state;
25292587

2530-
size_t instOffs = instructionOffsets[thread.nextInstruction];
2588+
size_t instOffs = instructionOffsets[currentPC];
25312589

25322590
// see if we're retiring any IDs at this state
25332591
for(size_t l = 0; l < thread.live.size();)
@@ -2566,7 +2624,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25662624

25672625
if(m_DebugInfo.valid)
25682626
{
2569-
size_t endOffs = instructionOffsets[thread.nextInstruction - 1];
2627+
size_t endOffs = instructionOffsets[currentPC - 1];
25702628

25712629
// append any inlined functions to the top of the stack
25722630
InlineData *inlined = m_DebugInfo.lineInline[endOffs];
@@ -2614,8 +2672,53 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
26142672
{
26152673
thread.StepNext(NULL, workgroup, activeMask);
26162674
}
2675+
threadBlockStates[threadId] = thread.enteredBlocks;
2676+
if(!thread.enteredBlocks.empty())
2677+
{
2678+
const uint32_t nextBlock = thread.callstack.back()->curBlock.value();
2679+
// The last block entered should be the current block
2680+
RDCASSERTEQUAL(nextBlock, thread.enteredBlocks.back());
2681+
}
2682+
2683+
Id newConvergeBlock = thread.mergeBlock;
2684+
// the thread executed a merge block
2685+
if(newConvergeBlock != prevConvergeBlock)
2686+
{
2687+
if(newMergeBlock == INVALID_BLOCK_INDEX)
2688+
{
2689+
newMergeBlock = newConvergeBlock.value();
2690+
RDCASSERTNOTEQUAL(newMergeBlock, INVALID_BLOCK_INDEX);
2691+
}
2692+
else
2693+
{
2694+
// All the threads in the tangle should converge to the same block
2695+
RDCASSERTEQUAL(newConvergeBlock.value(), newMergeBlock);
2696+
}
2697+
++countMergeBlockThreads;
2698+
}
2699+
// JAKE TODO: function exit is a convergence point
2700+
2701+
if(thread.Finished())
2702+
tangle.SetThreadDead(threadId);
2703+
2704+
if(thread.diverged)
2705+
++countDivergedThreads;
2706+
}
2707+
if(countMergeBlockThreads)
2708+
{
2709+
// all the active threads should have a merge block if any have a merge block
2710+
RDCASSERTEQUAL(countMergeBlockThreads, countActiveThreads);
2711+
tangle.AddMergePoint(newMergeBlock);
2712+
}
2713+
2714+
if(countDivergedThreads)
2715+
{
2716+
// all the active threads should have diverged if any diverges
2717+
RDCASSERTEQUAL(countDivergedThreads, countActiveThreads);
2718+
tangle.SetDiverged(true);
26172719
}
26182720
}
2721+
controlFlow.UpdateState(threadBlockStates);
26192722
}
26202723

26212724
return ret;
@@ -4324,7 +4427,6 @@ void Debugger::RegisterOp(Iter it)
43244427
curFunction = NULL;
43254428
}
43264429
}
4327-
43284430
}; // namespace rdcspv
43294431

43304432
#if ENABLED(ENABLE_UNIT_TESTS)

0 commit comments

Comments
 (0)