@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
1561
1561
1562
1562
std::sort (liveGlobals.begin (), liveGlobals.end ());
1563
1563
1564
+ rdcarray<rdcspv::ThreadIndex> threadIds;
1564
1565
for (uint32_t i = 0 ; i < threadsInWorkgroup; i++)
1565
1566
{
1566
1567
ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
1589
1590
// now that the globals are allocated and their storage won't move, we can take pointers to them
1590
1591
for (const PointerId &p : pointerIDs)
1591
1592
p.Set (*this , global, lane);
1593
+
1594
+ // Only add active lanes to control flow
1595
+ if (!lane.dead )
1596
+ threadIds.push_back (i);
1592
1597
}
1593
1598
1599
+ controlFlow.Construct (threadIds);
1600
+
1594
1601
// find quad neighbours
1595
1602
{
1596
1603
rdcarray<uint32_t > processedQuads;
@@ -2454,6 +2461,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2454
2461
if (steps == 0 )
2455
2462
{
2456
2463
ShaderDebugState initial;
2464
+ uint32_t startBlock = INVALID_BLOCK_INDEX;
2457
2465
2458
2466
// we should be sitting at the entry point function prologue, step forward into the first block
2459
2467
// and past any function-local variable declarations
@@ -2466,6 +2474,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2466
2474
thread.EnterEntryPoint (&initial);
2467
2475
FillCallstack (thread, initial);
2468
2476
initial.nextInstruction = thread.nextInstruction ;
2477
+ startBlock = thread.callstack .back ()->curBlock .value ();
2469
2478
}
2470
2479
else
2471
2480
{
@@ -2487,6 +2496,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2487
2496
2488
2497
ret.push_back (std::move (initial));
2489
2498
2499
+ // Set the initial block for the threads in the root tangle
2500
+ ThreadBlockStates threadBlockStates;
2501
+ TangleGroup &tangles = controlFlow.GetTangles ();
2502
+ RDCASSERTEQUAL (tangles.size (), 1 );
2503
+ RDCASSERTNOTEQUAL (startBlock, INVALID_BLOCK_INDEX);
2504
+ for (Tangle &tangle : tangles)
2505
+ {
2506
+ RDCASSERT (tangle.IsAliveActive ());
2507
+ for (uint32_t threadIdx = 0 ; threadIdx < workgroup.size (); ++threadIdx)
2508
+ threadBlockStates[threadIdx].push_back (startBlock);
2509
+ }
2510
+ controlFlow.UpdateState (threadBlockStates);
2490
2511
steps++;
2491
2512
}
2492
2513
@@ -2505,29 +2526,66 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2505
2526
if (active.Finished ())
2506
2527
break ;
2507
2528
2508
- // calculate the current mask of which threads are active
2509
- CalcActiveMask (activeMask);
2529
+ // Execute the threads in each active tangle
2530
+ ThreadBlockStates threadBlockStates;
2531
+ TangleGroup &tangles = controlFlow.GetTangles ();
2510
2532
2511
- // step all active members of the workgroup
2512
- for (size_t lane = 0 ; lane < workgroup.size (); lane++)
2533
+ for (Tangle &tangle : tangles)
2513
2534
{
2514
- ThreadState &thread = workgroup[lane];
2535
+ if (!tangle.IsAliveActive ())
2536
+ continue ;
2537
+
2538
+ rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs ();
2539
+ // calculate the current active thread mask from the threads in the tangle
2540
+ {
2541
+ // one bool per workgroup thread
2542
+ activeMask.resize (workgroup.size ());
2515
2543
2516
- if (activeMask[lane])
2544
+ // start with all threads as inactive
2545
+ for (size_t i = 0 ; i < workgroup.size (); i++)
2546
+ activeMask[i] = false ;
2547
+
2548
+ // activate the threads in the tangle
2549
+ for (const rdcspv::ThreadReference &ref : threadRefs)
2550
+ {
2551
+ uint32_t idx = ref.id ;
2552
+ RDCASSERT (idx < workgroup.size (), idx, workgroup.size ());
2553
+ RDCASSERT (!workgroup[idx].Finished ());
2554
+ activeMask[idx] = true ;
2555
+ }
2556
+ }
2557
+
2558
+ BlockIndex newMergeBlock = INVALID_BLOCK_INDEX;
2559
+ uint32_t countActiveThreads = 0 ;
2560
+ uint32_t countDivergedThreads = 0 ;
2561
+ uint32_t countMergeBlockThreads = 0 ;
2562
+
2563
+ // step all active members of the workgroup
2564
+ for (size_t lane = 0 ; lane < workgroup.size (); lane++)
2517
2565
{
2518
- if (thread.nextInstruction >= instructionOffsets.size ())
2566
+ if (!activeMask[lane])
2567
+ continue ;
2568
+ ++countActiveThreads;
2569
+
2570
+ ThreadState &thread = workgroup[lane];
2571
+ const uint32_t currentPC = thread.nextInstruction ;
2572
+ const uint32_t threadId = lane;
2573
+ RDCASSERTEQUAL (thread.callstack .back ()->curBlock .value (), tangle.GetBlockIndex ());
2574
+ if (currentPC >= instructionOffsets.size ())
2519
2575
{
2520
2576
if (lane == activeLaneIndex)
2521
2577
ret.emplace_back ();
2522
2578
2579
+ tangle.SetThreadDead (threadId);
2523
2580
continue ;
2524
2581
}
2582
+ Id prevConvergeBlock = thread.mergeBlock ;
2525
2583
2526
2584
if (lane == activeLaneIndex)
2527
2585
{
2528
2586
ShaderDebugState state;
2529
2587
2530
- size_t instOffs = instructionOffsets[thread. nextInstruction ];
2588
+ size_t instOffs = instructionOffsets[currentPC ];
2531
2589
2532
2590
// see if we're retiring any IDs at this state
2533
2591
for (size_t l = 0 ; l < thread.live .size ();)
@@ -2566,7 +2624,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2566
2624
2567
2625
if (m_DebugInfo.valid )
2568
2626
{
2569
- size_t endOffs = instructionOffsets[thread. nextInstruction - 1 ];
2627
+ size_t endOffs = instructionOffsets[currentPC - 1 ];
2570
2628
2571
2629
// append any inlined functions to the top of the stack
2572
2630
InlineData *inlined = m_DebugInfo.lineInline [endOffs];
@@ -2614,8 +2672,53 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
2614
2672
{
2615
2673
thread.StepNext (NULL , workgroup, activeMask);
2616
2674
}
2675
+ threadBlockStates[threadId] = thread.enteredBlocks ;
2676
+ if (!thread.enteredBlocks .empty ())
2677
+ {
2678
+ const uint32_t nextBlock = thread.callstack .back ()->curBlock .value ();
2679
+ // The last block entered should be the current block
2680
+ RDCASSERTEQUAL (nextBlock, thread.enteredBlocks .back ());
2681
+ }
2682
+
2683
+ Id newConvergeBlock = thread.mergeBlock ;
2684
+ // the thread executed a merge block
2685
+ if (newConvergeBlock != prevConvergeBlock)
2686
+ {
2687
+ if (newMergeBlock == INVALID_BLOCK_INDEX)
2688
+ {
2689
+ newMergeBlock = newConvergeBlock.value ();
2690
+ RDCASSERTNOTEQUAL (newMergeBlock, INVALID_BLOCK_INDEX);
2691
+ }
2692
+ else
2693
+ {
2694
+ // All the threads in the tangle should converge to the same block
2695
+ RDCASSERTEQUAL (newConvergeBlock.value (), newMergeBlock);
2696
+ }
2697
+ ++countMergeBlockThreads;
2698
+ }
2699
+ // JAKE TODO: function exit is a convergence point
2700
+
2701
+ if (thread.Finished ())
2702
+ tangle.SetThreadDead (threadId);
2703
+
2704
+ if (thread.diverged )
2705
+ ++countDivergedThreads;
2706
+ }
2707
+ if (countMergeBlockThreads)
2708
+ {
2709
+ // all the active threads should have a merge block if any have a merge block
2710
+ RDCASSERTEQUAL (countMergeBlockThreads, countActiveThreads);
2711
+ tangle.AddMergePoint (newMergeBlock);
2712
+ }
2713
+
2714
+ if (countDivergedThreads)
2715
+ {
2716
+ // all the active threads should have diverged if any diverges
2717
+ RDCASSERTEQUAL (countDivergedThreads, countActiveThreads);
2718
+ tangle.SetDiverged (true );
2617
2719
}
2618
2720
}
2721
+ controlFlow.UpdateState (threadBlockStates);
2619
2722
}
2620
2723
2621
2724
return ret;
@@ -4324,7 +4427,6 @@ void Debugger::RegisterOp(Iter it)
4324
4427
curFunction = NULL ;
4325
4428
}
4326
4429
}
4327
-
4328
4430
}; // namespace rdcspv
4329
4431
4330
4432
#if ENABLED(ENABLE_UNIT_TESTS)
0 commit comments