Skip to content

Commit 7042068

Browse files
davidrohrnoferini
authored andcommitted
GPU: Fix atomics on the host
1 parent 1e9fa78 commit 7042068

File tree

2 files changed

+38
-73
lines changed

2 files changed

+38
-73
lines changed

GPU/Common/GPUCommonMath.h

Lines changed: 38 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#if !defined(GPUCA_GPUCODE_DEVICE)
2525
#include <cmath>
2626
#include <algorithm>
27+
#include <atomic>
2728
#endif
2829

2930
#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
@@ -86,69 +87,49 @@ class GPUCommonMath
8687
template <class T>
8788
GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
8889
{
89-
return GPUCommonMath::AtomicExchInt(addr, val);
90+
return GPUCommonMath::AtomicExchInternal(addr, val);
9091
}
9192

9293
template <class T>
93-
GPUdi() static T AtomicCAS(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
94+
GPUdi() static bool AtomicCAS(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T cmp, T val)
9495
{
95-
return GPUCommonMath::AtomicCASInt(addr, cmp, val);
96+
return GPUCommonMath::AtomicCASInternal(addr, cmp, val);
9697
}
9798

9899
template <class T>
99100
GPUdi() static T AtomicAdd(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
100101
{
101-
return GPUCommonMath::AtomicAddInt(addr, val);
102+
return GPUCommonMath::AtomicAddInternal(addr, val);
102103
}
103104
template <class T>
104105
GPUdi() static void AtomicMax(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
105106
{
106-
GPUCommonMath::AtomicMaxInt(addr, val);
107+
GPUCommonMath::AtomicMaxInternal(addr, val);
107108
}
108109
template <class T>
109110
GPUdi() static void AtomicMin(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val)
110111
{
111-
GPUCommonMath::AtomicMinInt(addr, val);
112+
GPUCommonMath::AtomicMinInternal(addr, val);
112113
}
113114
template <class T>
114115
GPUdi() static T AtomicExchShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
115116
{
116-
#ifdef GPUCA_GPUCODE_DEVICE
117-
return GPUCommonMath::AtomicExchInt(addr, val);
118-
#else
119-
T retVal = *addr;
120-
*addr = val;
121-
return retVal;
122-
#endif
117+
return GPUCommonMath::AtomicExchInternal(addr, val);
123118
}
124119
template <class T>
125120
GPUdi() static T AtomicAddShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
126121
{
127-
#ifdef GPUCA_GPUCODE_DEVICE
128-
return GPUCommonMath::AtomicAddInt(addr, val);
129-
#else
130-
T retVal = *addr;
131-
*addr += val;
132-
return retVal;
133-
#endif
122+
return GPUCommonMath::AtomicAddInternal(addr, val);
134123
}
135124
template <class T>
136125
GPUdi() static void AtomicMaxShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
137126
{
138-
#ifdef GPUCA_GPUCODE_DEVICE
139-
GPUCommonMath::AtomicMaxInt(addr, val);
140-
#else
141-
*addr = std::max(*addr, val);
142-
#endif
127+
GPUCommonMath::AtomicMaxInternal(addr, val);
143128
}
144129
template <class T>
145130
GPUdi() static void AtomicMinShared(GPUsharedref() GPUgeneric() GPUAtomic(T) * addr, T val)
146131
{
147-
#ifdef GPUCA_GPUCODE_DEVICE
148-
GPUCommonMath::AtomicMinInt(addr, val);
149-
#else
150-
*addr = std::min(*addr, val);
151-
#endif
132+
GPUCommonMath::AtomicMinInternal(addr, val);
152133
}
153134
GPUd() static int Mul24(int a, int b);
154135
GPUd() static float FMulRZ(float a, float b);
@@ -176,15 +157,15 @@ class GPUCommonMath
176157

177158
private:
178159
template <class S, class T>
179-
GPUd() static unsigned int AtomicExchInt(S* addr, T val);
160+
GPUd() static unsigned int AtomicExchInternal(S* addr, T val);
180161
template <class S, class T>
181-
GPUd() static T AtomicCASInt(S* addr, T cmp, T val);
162+
GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
182163
template <class S, class T>
183-
GPUd() static unsigned int AtomicAddInt(S* addr, T val);
164+
GPUd() static unsigned int AtomicAddInternal(S* addr, T val);
184165
template <class S, class T>
185-
GPUd() static void AtomicMaxInt(S* addr, T val);
166+
GPUd() static void AtomicMaxInternal(S* addr, T val);
186167
template <class S, class T>
187-
GPUd() static void AtomicMinInt(S* addr, T val);
168+
GPUd() static void AtomicMinInternal(S* addr, T val);
188169
};
189170

190171
typedef GPUCommonMath CAMath;
@@ -431,7 +412,7 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
431412
}
432413

433414
template <class S, class T>
434-
GPUdi() unsigned int GPUCommonMath::AtomicExchInt(S* addr, T val)
415+
GPUdi() unsigned int GPUCommonMath::AtomicExchInternal(S* addr, T val)
435416
{
436417
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
437418
return ::atomic_exchange(addr, val);
@@ -444,33 +425,28 @@ GPUdi() unsigned int GPUCommonMath::AtomicExchInt(S* addr, T val)
444425
__atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
445426
return old;
446427
#else
447-
unsigned int old = *addr;
448-
*addr = val;
449-
return old;
428+
return reinterpret_cast<std::atomic<T>*>(addr)->exchange(val);
450429
#endif
451430
}
452431

453432
template <class S, class T>
454-
GPUdi() T GPUCommonMath::AtomicCASInt(S* addr, T cmp, T val)
433+
GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
455434
{
456435
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
457-
return ::atomic_compare_exchange(addr, cmp, val);
436+
return ::atomic_compare_exchange(addr, cmp, val) == cmp;
458437
#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
459-
return ::atomic_cmpxchg(addr, cmp, val);
438+
return ::atomic_cmpxchg(addr, cmp, val) == cmp;
460439
#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
461-
return ::atomicCAS(addr, cmp, val);
440+
return ::atomicCAS(addr, cmp, val) == cmp;
462441
#elif defined(WITH_OPENMP)
463-
__atomic_compare_exchange(addr, &cmp, &val, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
464-
return cmp;
442+
return __atomic_compare_exchange(addr, &cmp, &val, true, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
465443
#else
466-
T old = *addr;
467-
*addr = (old == cmp) ? val : old;
468-
return old;
444+
return reinterpret_cast<std::atomic<T>*>(addr)->compare_exchange_strong(cmp, val);
469445
#endif
470446
}
471447

472448
template <class S, class T>
473-
GPUdi() unsigned int GPUCommonMath::AtomicAddInt(S* addr, T val)
449+
GPUdi() unsigned int GPUCommonMath::AtomicAddInternal(S* addr, T val)
474450
{
475451
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
476452
return ::atomic_fetch_add(addr, val);
@@ -481,76 +457,66 @@ GPUdi() unsigned int GPUCommonMath::AtomicAddInt(S* addr, T val)
481457
#elif defined(WITH_OPENMP)
482458
return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST) - val;
483459
#else
484-
unsigned int old = *addr;
485-
*addr += val;
486-
return old;
460+
return reinterpret_cast<std::atomic<T>*>(addr)->fetch_add(val);
487461
#endif
488462
}
489463

490464
template <class S, class T>
491-
GPUdi() void GPUCommonMath::AtomicMaxInt(S* addr, T val)
465+
GPUdi() void GPUCommonMath::AtomicMaxInternal(S* addr, T val)
492466
{
493467
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
494468
::atomic_fetch_max(addr, val);
495469
#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
496470
::atomic_max(addr, val);
497471
#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
498472
::atomicMax(addr, val);
499-
#elif defined(WITH_OPENMP)
500-
while (*addr < val) {
501-
AtomicExch(addr, val);
502-
}
503473
#else
504-
if (*addr < val) {
505-
*addr = val;
474+
S current;
475+
while ((current = *(volatile S*)addr) < val && !AtomicCASInternal(addr, current, val)) {
506476
}
507477
#endif // GPUCA_GPUCODE
508478
}
509479

510480
template <class S, class T>
511-
GPUdi() void GPUCommonMath::AtomicMinInt(S* addr, T val)
481+
GPUdi() void GPUCommonMath::AtomicMinInternal(S* addr, T val)
512482
{
513483
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
514484
::atomic_fetch_min(addr, val);
515485
#elif defined(GPUCA_GPUCODE) && defined(__OPENCL__)
516486
::atomic_min(addr, val);
517487
#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
518488
::atomicMin(addr, val);
519-
#elif defined(WITH_OPENMP)
520-
while (*addr > val) {
521-
AtomicExch(addr, val);
522-
}
523489
#else
524-
if (*addr > val) {
525-
*addr = val;
490+
S current;
491+
while ((current = *(volatile S*)addr) > val && !AtomicCASInternal(addr, current, val)) {
526492
}
527493
#endif // GPUCA_GPUCODE
528494
}
529495

530496
#if (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__ROOTCINT__) && !defined(G__ROOT)
531497
#define GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
532498
template <>
533-
GPUdii() void GPUCommonMath::AtomicMaxInt(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
499+
GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
534500
{
535501
if (val == -0.f) {
536502
val = 0.f;
537503
}
538504
if (val >= 0) {
539-
AtomicMaxInt((GPUAtomic(int)*)addr, __float_as_int(val));
505+
AtomicMaxInternal((GPUAtomic(int)*)addr, __float_as_int(val));
540506
} else {
541-
AtomicMinInt((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
507+
AtomicMinInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
542508
}
543509
}
544510
template <>
545-
GPUdii() void GPUCommonMath::AtomicMinInt(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
511+
GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAtomic(float) * addr, float val)
546512
{
547513
if (val == -0.f) {
548514
val = 0.f;
549515
}
550516
if (val >= 0) {
551-
AtomicMinInt((GPUAtomic(int)*)addr, __float_as_int(val));
517+
AtomicMinInternal((GPUAtomic(int)*)addr, __float_as_int(val));
552518
} else {
553-
AtomicMaxInt((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
519+
AtomicMaxInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
554520
}
555521
}
556522
#endif

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,6 @@ static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class
325325
GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::insertTimer(unsigned int id, std::string&& name, int J, int num, int type, RecoStep step)
326326
{
327327
while (timerFlag.test_and_set()) {
328-
;
329328
}
330329
if (mTimers.size() <= id) {
331330
mTimers.resize(id + 1);

0 commit comments

Comments
 (0)