Skip to content

Commit

Permalink
Merge pull request NVIDIA#715 from senior-zero/fix-main/github/pascal…
Browse files Browse the repository at this point in the history
…_reduce_by_key_tile_state

Fix reduce by key tile state for Pascal
  • Loading branch information
gevtushenko committed Jun 13, 2023
2 parents 01f8003 + 7040b5f commit f76fbda
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,12 @@ struct no_delay_constructor_t
{
struct delay_t
{
__device__ __forceinline__ void operator()() {}
__device__ __forceinline__ void operator()()
{
NV_IF_TARGET(NV_PROVIDES_SM_70,
(),
(__threadfence_block();));
}
};

__device__ __forceinline__ no_delay_constructor_t(unsigned int /* seed */)
Expand Down Expand Up @@ -997,7 +1002,7 @@ struct ReduceByKeyScanTileState<ValueT, KeyT, true>
/**
* Wait for the corresponding tile to become non-invalid
*/
template <class DelayT = detail::fixed_delay_constructor_t<350, 450>>
template <class DelayT = detail::fixed_delay_constructor_t<350, 450>::delay_t>
__device__ __forceinline__ void WaitForValid(
int tile_idx,
StatusWord &status,
Expand Down

0 comments on commit f76fbda

Please sign in to comment.