2424#if !defined(GPUCA_GPUCODE_DEVICE)
2525#include < cmath>
2626#include < algorithm>
27+ #include < atomic>
2728#endif
2829
2930#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
@@ -86,69 +87,49 @@ class GPUCommonMath
8687 template <class T >
8788 GPUdi () static T AtomicExch (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
8889 {
89- return GPUCommonMath::AtomicExchInt (addr, val);
90+ return GPUCommonMath::AtomicExchInternal (addr, val);
9091 }
9192
9293 template <class T >
93- GPUdi () static T AtomicCAS (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
94+ GPUdi () static bool AtomicCAS (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
9495 {
95- return GPUCommonMath::AtomicCASInt (addr, cmp, val);
96+ return GPUCommonMath::AtomicCASInternal (addr, cmp, val);
9697 }
9798
9899 template <class T >
99100 GPUdi () static T AtomicAdd (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
100101 {
101- return GPUCommonMath::AtomicAddInt (addr, val);
102+ return GPUCommonMath::AtomicAddInternal (addr, val);
102103 }
103104 template <class T >
104105 GPUdi () static void AtomicMax (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
105106 {
106- GPUCommonMath::AtomicMaxInt (addr, val);
107+ GPUCommonMath::AtomicMaxInternal (addr, val);
107108 }
108109 template <class T >
109110 GPUdi () static void AtomicMin (GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
110111 {
111- GPUCommonMath::AtomicMinInt (addr, val);
112+ GPUCommonMath::AtomicMinInternal (addr, val);
112113 }
113114 template <class T >
114115 GPUdi () static T AtomicExchShared (GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
115116 {
116- #ifdef GPUCA_GPUCODE_DEVICE
117- return GPUCommonMath::AtomicExchInt (addr, val);
118- #else
119- T retVal = *addr;
120- *addr = val;
121- return retVal;
122- #endif
117+ return GPUCommonMath::AtomicExchInternal (addr, val);
123118 }
124119 template <class T >
125120 GPUdi () static T AtomicAddShared (GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
126121 {
127- #ifdef GPUCA_GPUCODE_DEVICE
128- return GPUCommonMath::AtomicAddInt (addr, val);
129- #else
130- T retVal = *addr;
131- *addr += val;
132- return retVal;
133- #endif
122+ return GPUCommonMath::AtomicAddInternal (addr, val);
134123 }
135124 template <class T >
136125 GPUdi () static void AtomicMaxShared (GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
137126 {
138- #ifdef GPUCA_GPUCODE_DEVICE
139- GPUCommonMath::AtomicMaxInt (addr, val);
140- #else
141- *addr = std::max (*addr, val);
142- #endif
127+ GPUCommonMath::AtomicMaxInternal (addr, val);
143128 }
144129 template <class T >
145130 GPUdi () static void AtomicMinShared (GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
146131 {
147- #ifdef GPUCA_GPUCODE_DEVICE
148- GPUCommonMath::AtomicMinInt (addr, val);
149- #else
150- *addr = std::min (*addr, val);
151- #endif
132+ GPUCommonMath::AtomicMinInternal (addr, val);
152133 }
153134 GPUd () static int Mul24 (int a, int b);
154135 GPUd () static float FMulRZ (float a, float b);
@@ -176,15 +157,15 @@ class GPUCommonMath
176157
177158 private:
178159 template <class S , class T >
179- GPUd () static unsigned int AtomicExchInt (S* addr, T val);
160+ GPUd () static unsigned int AtomicExchInternal (S* addr, T val);
180161 template <class S , class T >
181- GPUd () static T AtomicCASInt (S* addr, T cmp, T val);
162+ GPUd () static bool AtomicCASInternal (S* addr, T cmp, T val);
182163 template <class S , class T >
183- GPUd () static unsigned int AtomicAddInt (S* addr, T val);
164+ GPUd () static unsigned int AtomicAddInternal (S* addr, T val);
184165 template <class S , class T >
185- GPUd () static void AtomicMaxInt (S* addr, T val);
166+ GPUd () static void AtomicMaxInternal (S* addr, T val);
186167 template <class S , class T >
187- GPUd () static void AtomicMinInt (S* addr, T val);
168+ GPUd () static void AtomicMinInternal (S* addr, T val);
188169};
189170
190171typedef GPUCommonMath CAMath;
@@ -431,7 +412,7 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
431412}
432413
433414template <class S , class T >
434- GPUdi () unsigned int GPUCommonMath::AtomicExchInt (S* addr, T val)
415+ GPUdi () unsigned int GPUCommonMath::AtomicExchInternal (S* addr, T val)
435416{
436417#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
437418 return ::atomic_exchange (addr, val);
@@ -444,33 +425,28 @@ GPUdi() unsigned int GPUCommonMath::AtomicExchInt(S* addr, T val)
444425 __atomic_exchange (addr, &val, &old, __ATOMIC_SEQ_CST);
445426 return old;
446427#else
447- unsigned int old = *addr;
448- *addr = val;
449- return old;
428+ return reinterpret_cast <std::atomic<T>*>(addr)->exchange (val);
450429#endif
451430}
452431
453432template <class S , class T >
454- GPUdi () T GPUCommonMath::AtomicCASInt (S* addr, T cmp, T val)
433+ GPUdi () bool GPUCommonMath::AtomicCASInternal (S* addr, T cmp, T val)
455434{
456435#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
457- return ::atomic_compare_exchange (addr, cmp, val);
436+ return ::atomic_compare_exchange (addr, cmp, val) == cmp ;
458437#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
459- return ::atomic_cmpxchg (addr, cmp, val);
438+ return ::atomic_cmpxchg (addr, cmp, val) == cmp ;
460439#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
461- return ::atomicCAS (addr, cmp, val);
440+ return ::atomicCAS (addr, cmp, val) == cmp ;
462441#elif defined(WITH_OPENMP)
463- __atomic_compare_exchange (addr, &cmp, &val, false , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
464- return cmp;
442+ return __atomic_compare_exchange (addr, &cmp, &val, true , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
465443#else
466- T old = *addr;
467- *addr = (old == cmp) ? val : old;
468- return old;
444+ return reinterpret_cast <std::atomic<T>*>(addr)->compare_exchange_strong (cmp, val);
469445#endif
470446}
471447
472448template <class S , class T >
473- GPUdi () unsigned int GPUCommonMath::AtomicAddInt (S* addr, T val)
449+ GPUdi () unsigned int GPUCommonMath::AtomicAddInternal (S* addr, T val)
474450{
475451#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
476452 return ::atomic_fetch_add (addr, val);
@@ -481,76 +457,66 @@ GPUdi() unsigned int GPUCommonMath::AtomicAddInt(S* addr, T val)
481457#elif defined(WITH_OPENMP)
482458 return __atomic_add_fetch (addr, val, __ATOMIC_SEQ_CST) - val;
483459#else
484- unsigned int old = *addr;
485- *addr += val;
486- return old;
460+ return reinterpret_cast <std::atomic<T>*>(addr)->fetch_add (val);
487461#endif
488462}
489463
490464template <class S , class T >
491- GPUdi () void GPUCommonMath::AtomicMaxInt (S* addr, T val)
465+ GPUdi () void GPUCommonMath::AtomicMaxInternal (S* addr, T val)
492466{
493467#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
494468 ::atomic_fetch_max (addr, val);
495469#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
496470 ::atomic_max (addr, val);
497471#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
498472 ::atomicMax (addr, val);
499- #elif defined(WITH_OPENMP)
500- while (*addr < val) {
501- AtomicExch (addr, val);
502- }
503473#else
504- if (*addr < val) {
505- * addr = val;
474+ S current;
475+ while ((current = *( volatile S*) addr) < val && ! AtomicCASInternal (addr, current, val)) {
506476 }
507477#endif // GPUCA_GPUCODE
508478}
509479
510480template <class S , class T >
511- GPUdi () void GPUCommonMath::AtomicMinInt (S* addr, T val)
481+ GPUdi () void GPUCommonMath::AtomicMinInternal (S* addr, T val)
512482{
513483#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
514484 ::atomic_fetch_min (addr, val);
515485#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
516486 ::atomic_min (addr, val);
517487#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
518488 ::atomicMin (addr, val);
519- #elif defined(WITH_OPENMP)
520- while (*addr > val) {
521- AtomicExch (addr, val);
522- }
523489#else
524- if (*addr > val) {
525- * addr = val;
490+ S current;
491+ while ((current = *( volatile S*) addr) > val && ! AtomicCASInternal (addr, current, val)) {
526492 }
527493#endif // GPUCA_GPUCODE
528494}
529495
530496#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__ROOTCINT__) && !defined(G__ROOT)
531497#define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
532498template <>
533- GPUdii () void GPUCommonMath::AtomicMaxInt (GPUglobalref() GPUgeneric() GPUAtomic(float ) * addr, float val)
499+ GPUdii () void GPUCommonMath::AtomicMaxInternal (GPUglobalref() GPUgeneric() GPUAtomic(float ) * addr, float val)
534500{
535501 if (val == -0 .f ) {
536502 val = 0 .f ;
537503 }
538504 if (val >= 0 ) {
539- AtomicMaxInt ((GPUAtomic (int )*)addr, __float_as_int (val));
505+ AtomicMaxInternal ((GPUAtomic (int )*)addr, __float_as_int (val));
540506 } else {
541- AtomicMinInt ((GPUAtomic (unsigned int )*)addr, __float_as_uint (val));
507+ AtomicMinInternal ((GPUAtomic (unsigned int )*)addr, __float_as_uint (val));
542508 }
543509}
544510template <>
545- GPUdii () void GPUCommonMath::AtomicMinInt (GPUglobalref() GPUgeneric() GPUAtomic(float ) * addr, float val)
511+ GPUdii () void GPUCommonMath::AtomicMinInternal (GPUglobalref() GPUgeneric() GPUAtomic(float ) * addr, float val)
546512{
547513 if (val == -0 .f ) {
548514 val = 0 .f ;
549515 }
550516 if (val >= 0 ) {
551- AtomicMinInt ((GPUAtomic (int )*)addr, __float_as_int (val));
517+ AtomicMinInternal ((GPUAtomic (int )*)addr, __float_as_int (val));
552518 } else {
553- AtomicMaxInt ((GPUAtomic (unsigned int )*)addr, __float_as_uint (val));
519+ AtomicMaxInternal ((GPUAtomic (unsigned int )*)addr, __float_as_uint (val));
554520 }
555521}
556522#endif
0 commit comments