From 7040b5f8e8a0839e692401e39446571609870dfa Mon Sep 17 00:00:00 2001 From: Georgy Evtushenko Date: Tue, 13 Jun 2023 11:40:47 +0400 Subject: [PATCH] Fix reduce by key tile state for Pascal --- cub/agent/single_pass_scan_operators.cuh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cub/agent/single_pass_scan_operators.cuh b/cub/agent/single_pass_scan_operators.cuh index 66fce8ae1..351cea2dd 100644 --- a/cub/agent/single_pass_scan_operators.cuh +++ b/cub/agent/single_pass_scan_operators.cuh @@ -194,7 +194,12 @@ struct no_delay_constructor_t { struct delay_t { - __device__ __forceinline__ void operator()() {} + __device__ __forceinline__ void operator()() + { + NV_IF_TARGET(NV_PROVIDES_SM_70, + (), + (__threadfence_block();)); + } }; __device__ __forceinline__ no_delay_constructor_t(unsigned int /* seed */) @@ -997,7 +1002,7 @@ struct ReduceByKeyScanTileState /** * Wait for the corresponding tile to become non-invalid */ - template > + template ::delay_t> __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status,