Skip to content

Commit 7c3f793

Browse files
committed
Change SPIRV debugger control flow to emulate maximal reconvergence
1 parent f9c3364 commit 7c3f793

File tree

3 files changed

+166
-11
lines changed

3 files changed

+166
-11
lines changed

renderdoc/driver/shaders/spirv/spirv_debug.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ ThreadState::~ThreadState()
173173
callstack.clear();
174174
}
175175

176+
void ThreadState::SetConvergencePoint(Id block)
177+
{
178+
convergenceInstruction = debugger.GetInstructionForLabel(block);
179+
}
180+
176181
bool ThreadState::Finished() const
177182
{
178183
return dead || callstack.empty();
@@ -618,7 +623,11 @@ void ThreadState::JumpToLabel(Id target)
618623
frame->lastBlock = frame->curBlock;
619624
frame->curBlock = target;
620625

621-
nextInstruction = debugger.GetInstructionForLabel(target) + 1;
626+
diverged = true;
627+
628+
uint32_t labelInstruction = debugger.GetInstructionForLabel(target);
629+
enteredPoints.push_back(labelInstruction);
630+
nextInstruction = labelInstruction + 1;
622631

623632
// if jumping to an empty unconditional loop header, continue to the loop block
624633
Iter it = debugger.GetIterForInstruction(nextInstruction);
@@ -627,6 +636,7 @@ void ThreadState::JumpToLabel(Id target)
627636
OpLoopMerge merge(it);
628637

629638
mergeBlock = merge.mergeBlock;
639+
SetConvergencePoint(merge.mergeBlock);
630640

631641
it++;
632642
if(it.opcode() == Op::Branch)
@@ -699,6 +709,7 @@ void ThreadState::SkipIgnoredInstructions()
699709
OpSelectionMerge merge(it);
700710

701711
mergeBlock = merge.mergeBlock;
712+
SetConvergencePoint(merge.mergeBlock);
702713

703714
nextInstruction++;
704715
continue;
@@ -709,6 +720,7 @@ void ThreadState::SkipIgnoredInstructions()
709720
OpLoopMerge merge(it);
710721

711722
mergeBlock = merge.mergeBlock;
723+
SetConvergencePoint(merge.mergeBlock);
712724

713725
nextInstruction++;
714726
continue;
@@ -734,6 +746,10 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
734746

735747
Iter it = debugger.GetIterForInstruction(nextInstruction);
736748
nextInstruction++;
749+
diverged = false;
750+
enteredPoints.clear();
751+
convergenceInstruction = INVALID_EXECUTION_POINT;
752+
functionReturnPoint = INVALID_EXECUTION_POINT;
737753

738754
OpDecoder opdata(it);
739755

@@ -3879,6 +3895,8 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
38793895
// function. The second time we do have a return value so we process it and continue
38803896
if(returnValue.name.empty())
38813897
{
3898+
// The instruction after a function call is defined to be a convergence point
3899+
functionReturnPoint = nextInstruction;
38823900
uint32_t returnInstruction = nextInstruction - 1;
38833901
nextInstruction = debugger.GetInstructionForFunction(call.function);
38843902

@@ -3891,6 +3909,8 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
38913909
{
38923910
SetDst(call.result, returnValue);
38933911
returnValue.name.clear();
3912+
// The instruction after a function call is defined to be a convergence point, mark that we entered it
3913+
enteredPoints.push_back(nextInstruction);
38943914
}
38953915
break;
38963916
}

renderdoc/driver/shaders/spirv/spirv_debug.h

+11
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "api/replay/rdcarray.h"
2828
#include "maths/vec.h"
2929
#include "spirv_common.h"
30+
#include "spirv_controlflow.h"
3031
#include "spirv_processor.h"
3132

3233
struct SPIRVInterfaceAccess;
@@ -225,12 +226,19 @@ struct ThreadState
225226

226227
// the id of the merge block that the last branch targetted
227228
Id mergeBlock;
229+
uint32_t convergenceInstruction;
230+
uint32_t functionReturnPoint;
228231
ShaderVariable returnValue;
229232
rdcarray<StackFrame *> callstack;
230233

231234
// the list of IDs that are currently valid and live
232235
rdcarray<Id> live;
233236

237+
// true if executed an operation which could trigger divergence
238+
bool diverged;
239+
// list of potential convergence points that were entered in a single step (used for tracking thread convergence)
240+
rdcarray<uint32_t> enteredPoints;
241+
234242
std::map<Id, uint32_t> lastWrite;
235243

236244
// quad ID (arbitrary, just used to find neighbours for derivatives)
@@ -259,6 +267,7 @@ struct ThreadState
259267
bool ReferencePointer(Id id);
260268

261269
void SkipIgnoredInstructions();
270+
void SetConvergencePoint(Id block);
262271

263272
ShaderDebugState *m_State = NULL;
264273
};
@@ -528,6 +537,8 @@ class Debugger : public Processor, public ShaderDebugger
528537
rdcarray<LocalMapping> activeLocalMappings;
529538
} m_DebugInfo;
530539

540+
rdcspv::ControlFlow controlFlow;
541+
531542
const ScopeData *GetScope(size_t offset) const;
532543
};
533544

renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp

+134-10
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15611561

15621562
std::sort(liveGlobals.begin(), liveGlobals.end());
15631563

1564+
rdcarray<rdcspv::ThreadIndex> threadIds;
15641565
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
15651566
{
15661567
ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15891590
// now that the globals are allocated and their storage won't move, we can take pointers to them
15901591
for(const PointerId &p : pointerIDs)
15911592
p.Set(*this, global, lane);
1593+
1594+
// Only add active lanes to control flow
1595+
if(!lane.dead)
1596+
threadIds.push_back(i);
15921597
}
15931598

1599+
controlFlow.Construct(threadIds);
1600+
15941601
// find quad neighbours
15951602
{
15961603
rdcarray<uint32_t> processedQuads;
@@ -2462,6 +2469,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24622469
if(steps == 0)
24632470
{
24642471
ShaderDebugState initial;
2472+
uint32_t startBlock = INVALID_EXECUTION_POINT;
24652473

24662474
// we should be sitting at the entry point function prologue, step forward into the first block
24672475
// and past any function-local variable declarations
@@ -2474,6 +2482,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24742482
thread.EnterEntryPoint(&initial);
24752483
FillCallstack(thread, initial);
24762484
initial.nextInstruction = thread.nextInstruction;
2485+
startBlock = thread.callstack.back()->curBlock.value();
24772486
}
24782487
else
24792488
{
@@ -2495,6 +2504,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24952504

24962505
ret.push_back(std::move(initial));
24972506

2507+
// Set the initial block for the threads in the root tangle
2508+
ThreadExecutionStates threadExecutionStates;
2509+
TangleGroup &tangles = controlFlow.GetTangles();
2510+
RDCASSERTEQUAL(tangles.size(), 1);
2511+
RDCASSERTNOTEQUAL(startBlock, INVALID_EXECUTION_POINT);
2512+
for(Tangle &tangle : tangles)
2513+
{
2514+
RDCASSERT(tangle.IsAliveActive());
2515+
for(uint32_t threadIdx = 0; threadIdx < workgroup.size(); ++threadIdx)
2516+
threadExecutionStates[threadIdx].push_back(startBlock);
2517+
}
2518+
controlFlow.UpdateState(threadExecutionStates);
24982519
steps++;
24992520
}
25002521

@@ -2513,29 +2534,67 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25132534
if(active.Finished())
25142535
break;
25152536

2516-
// calculate the current mask of which threads are active
2517-
CalcActiveMask(activeMask);
2537+
// Execute the threads in each active tangle
2538+
ThreadExecutionStates threadExecutionStates;
2539+
TangleGroup &tangles = controlFlow.GetTangles();
25182540

2519-
// step all active members of the workgroup
2520-
for(size_t lane = 0; lane < workgroup.size(); lane++)
2541+
bool anyActiveThreads = false;
2542+
for(Tangle &tangle : tangles)
25212543
{
2522-
ThreadState &thread = workgroup[lane];
2544+
if(!tangle.IsAliveActive())
2545+
continue;
2546+
2547+
rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs();
2548+
// calculate the current active thread mask from the threads in the tangle
2549+
{
2550+
// one bool per workgroup thread
2551+
activeMask.resize(workgroup.size());
2552+
2553+
// start with all threads as inactive
2554+
for(size_t i = 0; i < workgroup.size(); i++)
2555+
activeMask[i] = false;
2556+
2557+
// activate the threads in the tangle
2558+
for(const rdcspv::ThreadReference &ref : threadRefs)
2559+
{
2560+
uint32_t idx = ref.id;
2561+
RDCASSERT(idx < workgroup.size(), idx, workgroup.size());
2562+
RDCASSERT(!workgroup[idx].Finished());
2563+
activeMask[idx] = true;
2564+
anyActiveThreads = true;
2565+
}
2566+
}
25232567

2524-
if(activeMask[lane])
2568+
ExecutionPoint newConvergeInstruction = INVALID_EXECUTION_POINT;
2569+
ExecutionPoint newFunctionReturnPoint = INVALID_EXECUTION_POINT;
2570+
uint32_t countActiveThreads = 0;
2571+
uint32_t countDivergedThreads = 0;
2572+
uint32_t countConvergePointThreads = 0;
2573+
uint32_t countFunctionReturnThreads = 0;
2574+
2575+
// step all active members of the workgroup
2576+
for(size_t lane = 0; lane < workgroup.size(); lane++)
25252577
{
2526-
if(thread.nextInstruction >= instructionOffsets.size())
2578+
if(!activeMask[lane])
2579+
continue;
2580+
++countActiveThreads;
2581+
2582+
ThreadState &thread = workgroup[lane];
2583+
const uint32_t currentPC = thread.nextInstruction;
2584+
const uint32_t threadId = lane;
2585+
if(currentPC >= instructionOffsets.size())
25272586
{
25282587
if(lane == activeLaneIndex)
25292588
ret.emplace_back();
25302589

2590+
tangle.SetThreadDead(threadId);
25312591
continue;
25322592
}
2533-
25342593
if(lane == activeLaneIndex)
25352594
{
25362595
ShaderDebugState state;
25372596

2538-
size_t instOffs = instructionOffsets[thread.nextInstruction];
2597+
size_t instOffs = instructionOffsets[currentPC];
25392598

25402599
// see if we're retiring any IDs at this state
25412600
for(size_t l = 0; l < thread.live.size();)
@@ -2574,7 +2633,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25742633

25752634
if(m_DebugInfo.valid)
25762635
{
2577-
size_t endOffs = instructionOffsets[thread.nextInstruction - 1];
2636+
size_t endOffs = instructionOffsets[currentPC - 1];
25782637

25792638
// append any inlined functions to the top of the stack
25802639
InlineData *inlined = m_DebugInfo.lineInline[endOffs];
@@ -2622,8 +2681,73 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
26222681
{
26232682
thread.StepNext(NULL, workgroup, activeMask);
26242683
}
2684+
threadExecutionStates[threadId] = thread.enteredPoints;
2685+
2686+
uint32_t threadConvergeInstruction = thread.convergenceInstruction;
2687+
// the thread activated a new convergence point
2688+
if(threadConvergeInstruction != INVALID_EXECUTION_POINT)
2689+
{
2690+
if(newConvergeInstruction == INVALID_EXECUTION_POINT)
2691+
{
2692+
newConvergeInstruction = threadConvergeInstruction;
2693+
RDCASSERTNOTEQUAL(newConvergeInstruction, INVALID_EXECUTION_POINT);
2694+
}
2695+
else
2696+
{
2697+
// All the threads in the tangle should set the same convergence point
2698+
RDCASSERTEQUAL(threadConvergeInstruction, newConvergeInstruction);
2699+
}
2700+
++countConvergePointThreads;
2701+
}
2702+
uint32_t threadFunctionReturnPoint = thread.functionReturnPoint;
2703+
// the thread activated a new function return point
2704+
if(threadFunctionReturnPoint != INVALID_EXECUTION_POINT)
2705+
{
2706+
if(newFunctionReturnPoint == INVALID_EXECUTION_POINT)
2707+
{
2708+
newFunctionReturnPoint = threadFunctionReturnPoint;
2709+
RDCASSERTNOTEQUAL(newFunctionReturnPoint, INVALID_EXECUTION_POINT);
2710+
}
2711+
else
2712+
{
2713+
// All the threads in the tangle should set the same function return point
2714+
RDCASSERTEQUAL(threadFunctionReturnPoint, newFunctionReturnPoint);
2715+
}
2716+
++countFunctionReturnThreads;
2717+
}
2718+
2719+
if(thread.Finished())
2720+
tangle.SetThreadDead(threadId);
2721+
2722+
if(thread.diverged)
2723+
++countDivergedThreads;
2724+
}
2725+
if(countConvergePointThreads)
2726+
{
2727+
// all the active threads should have a convergence point if any have one
2728+
RDCASSERTEQUAL(countConvergePointThreads, countActiveThreads);
2729+
tangle.AddMergePoint(newConvergeInstruction);
2730+
}
2731+
if(countFunctionReturnThreads)
2732+
{
2733+
// all the active threads should have a function return point if any have one
2734+
RDCASSERTEQUAL(countFunctionReturnThreads, countActiveThreads);
2735+
tangle.AddFunctionReturnPoint(newFunctionReturnPoint);
2736+
}
2737+
if(countDivergedThreads)
2738+
{
2739+
// all the active threads should have diverged if any diverges
2740+
RDCASSERTEQUAL(countDivergedThreads, countActiveThreads);
2741+
tangle.SetDiverged(true);
26252742
}
26262743
}
2744+
if(!anyActiveThreads)
2745+
{
2746+
active.dead = true;
2747+
controlFlow.UpdateState(threadExecutionStates);
2748+
RDCERR("No active threads in any tangle, killing active thread to terminate the debugger");
2749+
}
2750+
controlFlow.UpdateState(threadExecutionStates);
26272751
}
26282752

26292753
return ret;

0 commit comments

Comments
 (0)