Skip to content

Commit

Permalink
One idea
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 9, 2023
1 parent 5333190 commit 1e05a4d
Showing 1 changed file with 32 additions and 2 deletions.
34 changes: 32 additions & 2 deletions cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,28 @@ struct ClusterTilePrefixCallbackOp
}
}

__device__ __forceinline__ void
BroadcastInclusiveAggregate(T block_aggregate, ScanTileStatus status)
{
const unsigned int cta_rank = cooperative_groups::cluster_group::block_rank();
const unsigned int dst_cta = cta_rank + 1 + threadIdx.x;

// Notify last CTA first
for (int dst_cta = CUB_DETAIL_CLUSTER_SIZE - 1; dst_cta > 0; dst_cta--)
{
TxnWord * dsmem = cooperative_groups::cluster_group::map_shared_rank(temp_storage.dsmem, dst_cta);

TileDescriptor tile_descriptor;
tile_descriptor.status = status;
tile_descriptor.value = block_aggregate;

TxnWord alias;
*reinterpret_cast<TileDescriptor *>(&alias) = tile_descriptor;

dsmem_st_relaxed(dsmem + cta_rank, alias);
}
}

__device__ __forceinline__
T Reduce(unsigned int cta_rank, unsigned int src_cta, T value)
{
Expand Down Expand Up @@ -543,8 +565,16 @@ struct ClusterTilePrefixCallbackOp
exclusive_prefix = scan_op(window_aggregate, exclusive_prefix);
}

T inclusive_prefix = scan_op(exclusive_prefix, block_aggregate);
BroadcastBlockAggregate(inclusive_prefix, SCAN_TILE_INCLUSIVE);
// T inclusive_prefix = scan_op(exclusive_prefix, block_aggregate);
// TODO Different values!!!
// inclusive_prefix = __shfl_sync(CUB_DETAIL_CLUSTER_WARP_MASK, inclusive_prefix, 0, CUB_DETAIL_CLUSTER_SIZE);
// BroadcastBlockAggregate(inclusive_prefix, SCAN_TILE_INCLUSIVE);

if (threadIdx.x == 0)
{
T inclusive_prefix = scan_op(exclusive_prefix, block_aggregate);
BroadcastInclusiveAggregate(inclusive_prefix, SCAN_TILE_INCLUSIVE);
}
}
else
{
Expand Down

0 comments on commit 1e05a4d

Please sign in to comment.