From 121faaf3737ced55ab0d7344c4c2a2edaa970e09 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sat, 27 Jun 2020 09:08:45 -0700 Subject: [PATCH 1/2] whitespace changes Signed-off-by: Jeff Hammond --- src/AtomicMacro.hh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index 4c31c853..2785c64a 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -16,18 +16,20 @@ //Currently not atomic here. But its only used when it does not necissarially need to be atomic. #define ATOMIC_WRITE( x, v ) \ - x = v; + x = v; #define ATOMIC_ADD( x, v ) \ atomicAdd( &x, v ); - + #define ATOMIC_UPDATE( x ) \ atomicAdd( &x, 1 ); #define ATOMIC_CAPTURE( x, v, p ) \ p = atomicAdd( &x, v ); + //If in a CPU OpenMP section use the OpenMP atomics #elif defined (USE_OPENMP_ATOMICS) + #define ATOMIC_WRITE( x, v ) \ _Pragma("omp atomic write") \ x = v; @@ -46,6 +48,7 @@ //If in a serial section, no need to use atomics #else + #define ATOMIC_WRITE( x, v ) \ x = v; @@ -62,6 +65,7 @@ //If in a OpenMP section use the OpenMP atomics #elif defined (USE_OPENMP_ATOMICS) + #define ATOMIC_WRITE( x, v ) \ _Pragma("omp atomic write") \ x = v; @@ -74,12 +78,13 @@ _Pragma("omp atomic update") \ x++; - #define ATOMIC_CAPTURE( x, v, p ) \ - _Pragma("omp atomic capture") \ - {p = x; x = x + v;} + #define ATOMIC_CAPTURE( x, v, p ) \ + _Pragma("omp atomic capture") \ + {p = x; x = x + v;} //If in a serial section, no need to use atomics #else + #define ATOMIC_WRITE( x, v ) \ x = v; @@ -91,4 +96,5 @@ #define ATOMIC_CAPTURE( x, v, p ) \ {p = x; x = x + v;} + #endif From 5bf8b84d5403f853eefef4c63ff932aad2be2eae Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sat, 27 Jun 2020 09:12:01 -0700 Subject: [PATCH 2/2] new implementation of atomics New version uses functions not macros. The use of template functions allows for enforcement of type-safety, which is implemented using static_assert. The old implementation is preserved for posterity. A header guard was added. I found the old macro names confusing, so I used new names, but I map the old names in the source onto them so the application source does not change. Signed-off-by: Jeff Hammond --- src/AtomicMacro.hh | 158 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh index 2785c64a..b438665e 100644 --- a/src/AtomicMacro.hh +++ b/src/AtomicMacro.hh @@ -1,3 +1,8 @@ +#ifndef AtomicMacro_HH_ +#define AtomicMacro_HH_ + +#define USE_MACRO_FUNCTIONS 1 + //Determine which atomics to use based on platform being compiled for // //If compiling with CUDA @@ -8,6 +13,153 @@ #define USE_OPENMP_ATOMICS #endif +// -------------------------------------------------- +// Original Names -> Inline function names +// -------------------------------------------------- +// ATOMIC_WRITE( x, v ) -> ATOMIC_WRITE +// ATOMIC_UPDATE( x ) -> ATOMIC_INCREMENT +// ATOMIC_ADD( x, v ) -> ATOMIC_ADD +// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD +// -------------------------------------------------- + +#if defined (USE_MACRO_FUNCTIONS) + +#define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p)) +#define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x)) + +#if defined(HAVE_CUDA) && defined(__CUDA_ARCH__) + +template +inline void ATOMIC_WRITE(T & x, T v) { + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + atomicAdd( &x, 1 ); +} + +template +inline void ATOMIC_ADD(T& x, T v) { + atomicAdd( &x, v ); +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + p = atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + p = atomicAdd( &x, v ); +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + p = atomicAdd( &x, v ); +} + +#elif defined(USE_OPENMP_ATOMICS) + +template +inline void ATOMIC_WRITE(T & x, T v) { + _Pragma("omp atomic write") + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + _Pragma("omp atomic update") + x++; +} + +template +inline void ATOMIC_ADD(T& x, T v) { + _Pragma("omp atomic") + x += v; +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + _Pragma("omp atomic") + x += v; +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + _Pragma("omp atomic capture") + {p = x; x = x + v;} +} + +#else // SEQUENTIAL + +template +inline void ATOMIC_WRITE(T & x, T v) { + x = v; +} + +template +inline void ATOMIC_INCREMENT(T& x) { + x++; +} + +template +inline void ATOMIC_ADD(T& x, T v) { + x += v; +} + +template +inline void ATOMIC_ADD(T1& x, T2 v) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + x += v; +} + +template +inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) { + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + {p = x; x = x + v;} +} + +template +inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) { + static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large"); + static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large"); + {p = x; x = x + v;} +} + +#endif // BACKENDS + +#else // ! USE_MACRO_FUNCTIONS #if defined (HAVE_CUDA) @@ -97,4 +249,8 @@ #define ATOMIC_CAPTURE( x, v, p ) \ {p = x; x = x + v;} -#endif +#endif // BACKENDS + +#endif // USE_MACRO_FUNCTIONS + +#endif // AtomicMacro_HH_