Skip to content

Commit

Permalink
Fix dsmem stores
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 9, 2023
1 parent 16195e3 commit 9376f3f
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -366,31 +366,31 @@ static __device__ __forceinline__ unsigned int dsmem_ld_relaxed(unsigned int con

static __device__ __forceinline__ void dsmem_st_relaxed(uint4 *ptr, uint4 val)
{
asm volatile("st.relaxed.shared.cluster.v4.u32 [%0], {%1, %2, %3, %4};"
asm volatile("st.relaxed.shared::cluster.cluster.v4.u32 [%0], {%1, %2, %3, %4};"
:
: _CUB_ASM_PTR_(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
: "memory");
}

static __device__ __forceinline__ void dsmem_st_relaxed(ulonglong2 *ptr, ulonglong2 val)
{
asm volatile("st.relaxed.shared.cluster.v2.u64 [%0], {%1, %2};"
asm volatile("st.relaxed.shared::cluster.cluster.v2.u64 [%0], {%1, %2};"
:
: _CUB_ASM_PTR_(ptr), "l"(val.x), "l"(val.y)
: "memory");
}

static __device__ __forceinline__ void dsmem_st_relaxed(ushort4 *ptr, ushort4 val)
{
asm volatile("st.relaxed.shared.cluster.v4.u16 [%0], {%1, %2, %3, %4};"
asm volatile("st.relaxed.shared::cluster.cluster.v4.u16 [%0], {%1, %2, %3, %4};"
:
: _CUB_ASM_PTR_(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
: "memory");
}

static __device__ __forceinline__ void dsmem_st_relaxed(uint2 *ptr, uint2 val)
{
asm volatile("st.relaxed.shared.cluster.v2.u32 [%0], {%1, %2};"
asm volatile("st.relaxed.shared::cluster.cluster.v2.u32 [%0], {%1, %2};"
:
: _CUB_ASM_PTR_(ptr), "r"(val.x), "r"(val.y)
: "memory");
Expand All @@ -399,15 +399,15 @@ static __device__ __forceinline__ void dsmem_st_relaxed(uint2 *ptr, uint2 val)
static __device__ __forceinline__ void dsmem_st_relaxed(unsigned long long *ptr,
unsigned long long val)
{
asm volatile("st.relaxed.shared.cluster.u64 [%0], %1;"
asm volatile("st.relaxed.shared::cluster.cluster.u64 [%0], %1;"
:
: _CUB_ASM_PTR_(ptr), "l"(val)
: "memory");
}

static __device__ __forceinline__ void dsmem_st_relaxed(unsigned int *ptr, unsigned int val)
{
asm volatile("st.relaxed.shared.cluster.u32 [%0], %1;"
asm volatile("st.relaxed.shared::cluster.cluster.u32 [%0], %1;"
:
: _CUB_ASM_PTR_(ptr), "r"(val)
: "memory");
Expand Down

0 comments on commit 9376f3f

Please sign in to comment.