diff --git a/cub/agent/agent_scan.cuh b/cub/agent/agent_scan.cuh index 253453bda..82b29912c 100644 --- a/cub/agent/agent_scan.cuh +++ b/cub/agent/agent_scan.cuh @@ -358,6 +358,7 @@ struct AgentScan // Wait for all threads in the cluster to finish loading / dsmem initialization cooperative_groups::cluster_group::barrier_wait(std::move(token)); + __threadfence(); // Perform tile scan if (tile_idx == 0)