@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
1561
1561
1562
1562
std::sort (liveGlobals.begin (), liveGlobals.end ());
1563
1563
1564
+ rdcarray<rdcspv::ThreadIndex> threadIds;
1564
1565
for (uint32_t i = 0 ; i < threadsInWorkgroup; i++)
1565
1566
{
1566
1567
ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
1589
1590
// now that the globals are allocated and their storage won't move, we can take pointers to them
1590
1591
for (const PointerId &p : pointerIDs)
1591
1592
p.Set (*this , global, lane);
1593
+
1594
+ // Only add active lanes to control flow
1595
+ if (!lane.dead )
1596
+ threadIds.push_back (i);
1592
1597
}
1593
1598
1599
+ controlFlow.Construct (threadIds);
1600
+
1594
1601
// find quad neighbours
1595
1602
{
1596
1603
rdcarray<uint32_t > processedQuads;
@@ -2462,6 +2469,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2462
2469
if (steps == 0 )
2463
2470
{
2464
2471
ShaderDebugState initial;
2472
+ uint32_t startBlock = INVALID_EXECUTION_POINT;
2465
2473
2466
2474
// we should be sitting at the entry point function prologue, step forward into the first block
2467
2475
// and past any function-local variable declarations
@@ -2474,6 +2482,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2474
2482
thread.EnterEntryPoint (&initial);
2475
2483
FillCallstack (thread, initial);
2476
2484
initial.nextInstruction = thread.nextInstruction ;
2485
+ startBlock = thread.callstack .back ()->curBlock .value ();
2477
2486
}
2478
2487
else
2479
2488
{
@@ -2495,6 +2504,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2495
2504
2496
2505
ret.push_back (std::move (initial));
2497
2506
2507
+ // Set the initial block for the threads in the root tangle
2508
+ ThreadExecutionStates threadExecutionStates;
2509
+ TangleGroup &tangles = controlFlow.GetTangles ();
2510
+ RDCASSERTEQUAL (tangles.size (), 1 );
2511
+ RDCASSERTNOTEQUAL (startBlock, INVALID_EXECUTION_POINT);
2512
+ for (Tangle &tangle : tangles)
2513
+ {
2514
+ RDCASSERT (tangle.IsAliveActive ());
2515
+ for (uint32_t threadIdx = 0 ; threadIdx < workgroup.size (); ++threadIdx)
2516
+ threadExecutionStates[threadIdx].push_back (startBlock);
2517
+ }
2518
+ controlFlow.UpdateState (threadExecutionStates);
2498
2519
steps++;
2499
2520
}
2500
2521
@@ -2513,29 +2534,67 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2513
2534
if (active.Finished ())
2514
2535
break ;
2515
2536
2516
- // calculate the current mask of which threads are active
2517
- CalcActiveMask (activeMask);
2537
+ // Execute the threads in each active tangle
2538
+ ThreadExecutionStates threadExecutionStates;
2539
+ TangleGroup &tangles = controlFlow.GetTangles ();
2518
2540
2519
- // step all active members of the workgroup
2520
- for (size_t lane = 0 ; lane < workgroup. size (); lane++ )
2541
+ bool anyActiveThreads = false ;
2542
+ for (Tangle &tangle : tangles )
2521
2543
{
2522
- ThreadState &thread = workgroup[lane];
2544
+ if (!tangle.IsAliveActive ())
2545
+ continue ;
2546
+
2547
+ rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs ();
2548
+ // calculate the current active thread mask from the threads in the tangle
2549
+ {
2550
+ // one bool per workgroup thread
2551
+ activeMask.resize (workgroup.size ());
2552
+
2553
+ // start with all threads as inactive
2554
+ for (size_t i = 0 ; i < workgroup.size (); i++)
2555
+ activeMask[i] = false ;
2556
+
2557
+ // activate the threads in the tangle
2558
+ for (const rdcspv::ThreadReference &ref : threadRefs)
2559
+ {
2560
+ uint32_t idx = ref.id ;
2561
+ RDCASSERT (idx < workgroup.size (), idx, workgroup.size ());
2562
+ RDCASSERT (!workgroup[idx].Finished ());
2563
+ activeMask[idx] = true ;
2564
+ anyActiveThreads = true ;
2565
+ }
2566
+ }
2523
2567
2524
- if (activeMask[lane])
2568
+ ExecutionPoint newConvergeInstruction = INVALID_EXECUTION_POINT;
2569
+ ExecutionPoint newFunctionReturnPoint = INVALID_EXECUTION_POINT;
2570
+ uint32_t countActiveThreads = 0 ;
2571
+ uint32_t countDivergedThreads = 0 ;
2572
+ uint32_t countConvergePointThreads = 0 ;
2573
+ uint32_t countFunctionReturnThreads = 0 ;
2574
+
2575
+ // step all active members of the workgroup
2576
+ for (size_t lane = 0 ; lane < workgroup.size (); lane++)
2525
2577
{
2526
- if (thread.nextInstruction >= instructionOffsets.size ())
2578
+ if (!activeMask[lane])
2579
+ continue ;
2580
+ ++countActiveThreads;
2581
+
2582
+ ThreadState &thread = workgroup[lane];
2583
+ const uint32_t currentPC = thread.nextInstruction ;
2584
+ const uint32_t threadId = lane;
2585
+ if (currentPC >= instructionOffsets.size ())
2527
2586
{
2528
2587
if (lane == activeLaneIndex)
2529
2588
ret.emplace_back ();
2530
2589
2590
+ tangle.SetThreadDead (threadId);
2531
2591
continue ;
2532
2592
}
2533
-
2534
2593
if (lane == activeLaneIndex)
2535
2594
{
2536
2595
ShaderDebugState state;
2537
2596
2538
- size_t instOffs = instructionOffsets[thread. nextInstruction ];
2597
+ size_t instOffs = instructionOffsets[currentPC ];
2539
2598
2540
2599
// see if we're retiring any IDs at this state
2541
2600
for (size_t l = 0 ; l < thread.live .size ();)
@@ -2574,7 +2633,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2574
2633
2575
2634
if (m_DebugInfo.valid )
2576
2635
{
2577
- size_t endOffs = instructionOffsets[thread. nextInstruction - 1 ];
2636
+ size_t endOffs = instructionOffsets[currentPC - 1 ];
2578
2637
2579
2638
// append any inlined functions to the top of the stack
2580
2639
InlineData *inlined = m_DebugInfo.lineInline [endOffs];
@@ -2622,8 +2681,73 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2622
2681
{
2623
2682
thread.StepNext (NULL , workgroup, activeMask);
2624
2683
}
2684
+ threadExecutionStates[threadId] = thread.enteredPoints ;
2685
+
2686
+ uint32_t threadConvergeInstruction = thread.convergenceInstruction ;
2687
+ // the thread activated a new convergence point
2688
+ if (threadConvergeInstruction != INVALID_EXECUTION_POINT)
2689
+ {
2690
+ if (newConvergeInstruction == INVALID_EXECUTION_POINT)
2691
+ {
2692
+ newConvergeInstruction = threadConvergeInstruction;
2693
+ RDCASSERTNOTEQUAL (newConvergeInstruction, INVALID_EXECUTION_POINT);
2694
+ }
2695
+ else
2696
+ {
2697
+ // All the threads in the tangle should set the same convergence point
2698
+ RDCASSERTEQUAL (threadConvergeInstruction, newConvergeInstruction);
2699
+ }
2700
+ ++countConvergePointThreads;
2701
+ }
2702
+ uint32_t threadFunctionReturnPoint = thread.functionReturnPoint ;
2703
+ // the thread activated a new function return point
2704
+ if (threadFunctionReturnPoint != INVALID_EXECUTION_POINT)
2705
+ {
2706
+ if (newFunctionReturnPoint == INVALID_EXECUTION_POINT)
2707
+ {
2708
+ newFunctionReturnPoint = threadFunctionReturnPoint;
2709
+ RDCASSERTNOTEQUAL (newFunctionReturnPoint, INVALID_EXECUTION_POINT);
2710
+ }
2711
+ else
2712
+ {
2713
+ // All the threads in the tangle should set the same function return point
2714
+ RDCASSERTEQUAL (threadFunctionReturnPoint, newFunctionReturnPoint);
2715
+ }
2716
+ ++countFunctionReturnThreads;
2717
+ }
2718
+
2719
+ if (thread.Finished ())
2720
+ tangle.SetThreadDead (threadId);
2721
+
2722
+ if (thread.diverged )
2723
+ ++countDivergedThreads;
2724
+ }
2725
+ if (countConvergePointThreads)
2726
+ {
2727
+ // all the active threads should have a convergence point if any have one
2728
+ RDCASSERTEQUAL (countConvergePointThreads, countActiveThreads);
2729
+ tangle.AddMergePoint (newConvergeInstruction);
2730
+ }
2731
+ if (countFunctionReturnThreads)
2732
+ {
2733
+ // all the active threads should have a function return point if any have one
2734
+ RDCASSERTEQUAL (countFunctionReturnThreads, countActiveThreads);
2735
+ tangle.AddFunctionReturnPoint (newFunctionReturnPoint);
2736
+ }
2737
+ if (countDivergedThreads)
2738
+ {
2739
+ // all the active threads should have diverged if any diverges
2740
+ RDCASSERTEQUAL (countDivergedThreads, countActiveThreads);
2741
+ tangle.SetDiverged (true );
2625
2742
}
2626
2743
}
2744
+ if (!anyActiveThreads)
2745
+ {
2746
+ active.dead = true ;
2747
+ controlFlow.UpdateState (threadExecutionStates);
2748
+ RDCERR (" No active threads in any tangle, killing active thread to terminate the debugger" );
2749
+ }
2750
+ controlFlow.UpdateState (threadExecutionStates);
2627
2751
}
2628
2752
2629
2753
return ret;
0 commit comments