alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
atomicBuiltIn.hpp
Go to the documentation of this file.
1/* Copyright 2022 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/utility.hpp"
11
12#include <type_traits>
13
14#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP
15
16namespace alpaka::onAcc
17{
18 //! The GPU CUDA/HIP accelerator atomic ops.
19 //
20 // Atomics can be used in the hierarchy level grids, blocks and threads.
21 // Atomics are not guaranteed to be safe between devices.
22 class AtomicUniformCudaHipBuiltIn
23 {
24 };
25} // namespace alpaka::onAcc
26
27//! These types must be in the global namespace for checking existence of respective functions in global namespace via
28//! SFINAE, so we use inline namespace.
29inline namespace alpakaGlobal
30{
31 //! Provide an interface to builtin atomic functions.
32 //
33 // To check for the existence of builtin functions located in the global namespace :: directly.
34 // This would not be possible without having these types in global namespace.
35 // If the functor is inheriting from std::false_type an signature is explicitly not available. This can be used to
36 // explicitly disable builtin function in case the builtin is broken.
37 // If the functor is inheriting from std::true_type a specialization must implement one of the following
38 // interfaces.
39 // \code{.cpp}
40 // // interface for all atomics except atomicCas
41 // __device__ static T atomic( T* add, T value);
42 // // interface for atomicCas only
43 // __device__ static T atomic( T* add, T compare, T value);
44 // \endcode
45 template<typename TOp, typename T, typename T_Scope, typename TSfinae = void>
46 struct AlpakaBuiltInAtomic : std::false_type
47 {
48 };
49
50 // Cas.
51 template<typename T, typename T_Scope>
52 struct AlpakaBuiltInAtomic<
53 alpaka::operation::Cas,
54 T,
55 T_Scope,
56 typename std::void_t<
57 decltype(atomicCAS(alpaka::core::declval<T*>(), alpaka::core::declval<T>(), alpaka::core::declval<T>()))>>
58 : std::true_type
59 {
60 static __device__ T atomic(T* add, T compare, T value)
61 {
62 return atomicCAS(add, compare, value);
63 }
64 };
65
66 template<typename T>
67 struct AlpakaBuiltInAtomic<
68 alpaka::operation::Cas,
69 T,
70 alpaka::onAcc::scope::Block,
71 typename std::void_t<decltype(atomicCAS_block(
72 alpaka::core::declval<T*>(),
73 alpaka::core::declval<T>(),
74 alpaka::core::declval<T>()))>> : std::true_type
75 {
76 static __device__ T atomic(T* add, T compare, T value)
77 {
78 return atomicCAS_block(add, compare, value);
79 }
80 };
81
82 // Add.
83 template<typename T, typename T_Scope>
84 struct AlpakaBuiltInAtomic<
85 alpaka::operation::Add,
86 T,
87 T_Scope,
88 typename std::void_t<decltype(atomicAdd(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
89 : std::true_type
90 {
91 static __device__ T atomic(T* add, T value)
92 {
93 return atomicAdd(add, value);
94 }
95 };
96
97 template<typename T>
98 struct AlpakaBuiltInAtomic<
99 alpaka::operation::Add,
100 T,
101 alpaka::onAcc::scope::Block,
102 typename std::void_t<decltype(atomicAdd_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
103 : std::true_type
104 {
105 static __device__ T atomic(T* add, T value)
106 {
107 return atomicAdd_block(add, value);
108 }
109 };
110
111# if (ALPAKA_LANG_HIP)
112 // HIP shows bad performance with builtin atomicAdd(float*,float) for the hierarchy threads therefore we do not
113 // call the buildin method and instead use the atomicCAS emulation. For details see:
114 // https://github.com/alpaka-group/alpaka/issues/1657
115 template<>
116 struct AlpakaBuiltInAtomic<alpaka::operation::Add, float, alpaka::onAcc::scope::Block> : std::false_type
117 {
118 };
119# endif
120
121 // Sub.
122
123 template<typename T, typename T_Scope>
124 struct AlpakaBuiltInAtomic<
125 alpaka::operation::Sub,
126 T,
127 T_Scope,
128 typename std::void_t<decltype(atomicSub(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
129 : std::true_type
130 {
131 static __device__ T atomic(T* add, T value)
132 {
133 return atomicSub(add, value);
134 }
135 };
136
137 template<typename T>
138 struct AlpakaBuiltInAtomic<
139 alpaka::operation::Sub,
140 T,
141 alpaka::onAcc::scope::Block,
142 typename std::void_t<decltype(atomicSub_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
143 : std::true_type
144 {
145 static __device__ T atomic(T* add, T value)
146 {
147 return atomicSub_block(add, value);
148 }
149 };
150
151 // Min.
152 template<typename T, typename T_Scope>
153 struct AlpakaBuiltInAtomic<
154 alpaka::operation::Min,
155 T,
156 T_Scope,
157 typename std::void_t<decltype(atomicMin(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
158 : std::true_type
159 {
160 static __device__ T atomic(T* add, T value)
161 {
162 return atomicMin(add, value);
163 }
164 };
165
166 template<typename T>
167 struct AlpakaBuiltInAtomic<
168 alpaka::operation::Min,
169 T,
170 alpaka::onAcc::scope::Block,
171 typename std::void_t<decltype(atomicMin_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
172 : std::true_type
173 {
174 static __device__ T atomic(T* add, T value)
175 {
176 return atomicMin_block(add, value);
177 }
178 };
179
180// disable HIP atomicMin: see https://github.com/ROCm-Developer-Tools/hipamd/pull/40
181# if (ALPAKA_LANG_HIP)
182 template<typename T_Scope>
183 struct AlpakaBuiltInAtomic<alpaka::operation::Min, float, T_Scope> : std::false_type
184 {
185 };
186
187 template<>
188 struct AlpakaBuiltInAtomic<alpaka::operation::Min, float, alpaka::onAcc::scope::Block> : std::false_type
189 {
190 };
191
192 template<typename T_Scope>
193 struct AlpakaBuiltInAtomic<alpaka::operation::Min, double, T_Scope> : std::false_type
194 {
195 };
196
197 template<>
198 struct AlpakaBuiltInAtomic<alpaka::operation::Min, double, alpaka::onAcc::scope::Block> : std::false_type
199 {
200 };
201
202# if !__has_builtin(__hip_atomic_compare_exchange_strong)
203 template<typename T_Scope>
204 struct AlpakaBuiltInAtomic<alpaka::operation::Min, unsigned long long, T_Scope> : std::false_type
205 {
206 };
207
208 template<>
209 struct AlpakaBuiltInAtomic<alpaka::operation::Min, unsigned long long, alpaka::onAcc::scope::Block>
210 : std::false_type
211 {
212 };
213# endif
214# endif
215
216 // Max.
217
218 template<typename T, typename T_Scope>
219 struct AlpakaBuiltInAtomic<
220 alpaka::operation::Max,
221 T,
222 T_Scope,
223 typename std::void_t<decltype(atomicMax(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
224 : std::true_type
225 {
226 static __device__ T atomic(T* add, T value)
227 {
228 return atomicMax(add, value);
229 }
230 };
231
232 template<typename T>
233 struct AlpakaBuiltInAtomic<
234 alpaka::operation::Max,
235 T,
236 alpaka::onAcc::scope::Block,
237 typename std::void_t<decltype(atomicMax_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
238 : std::true_type
239 {
240 static __device__ T atomic(T* add, T value)
241 {
242 return atomicMax_block(add, value);
243 }
244 };
245
246 // disable HIP atomicMax: see https://github.com/ROCm-Developer-Tools/hipamd/pull/40
247# if (ALPAKA_LANG_HIP)
248 template<typename T_Scope>
249 struct AlpakaBuiltInAtomic<alpaka::operation::Max, float, T_Scope> : std::false_type
250 {
251 };
252
253 template<>
254 struct AlpakaBuiltInAtomic<alpaka::operation::Max, float, alpaka::onAcc::scope::Block> : std::false_type
255 {
256 };
257
258 template<typename T_Scope>
259 struct AlpakaBuiltInAtomic<alpaka::operation::Max, double, T_Scope> : std::false_type
260 {
261 };
262
263 template<>
264 struct AlpakaBuiltInAtomic<alpaka::operation::Max, double, alpaka::onAcc::scope::Block> : std::false_type
265 {
266 };
267
268# if !__has_builtin(__hip_atomic_compare_exchange_strong)
269 template<typename T_Scope>
270 struct AlpakaBuiltInAtomic<alpaka::operation::Max, unsigned long long, T_Scope> : std::false_type
271 {
272 };
273
274 template<>
275 struct AlpakaBuiltInAtomic<alpaka::operation::Max, unsigned long long, alpaka::onAcc::scope::Block>
276 : std::false_type
277 {
278 };
279# endif
280# endif
281
282
283 // Exch.
284
285 template<typename T, typename T_Scope>
286 struct AlpakaBuiltInAtomic<
287 alpaka::operation::Exch,
288 T,
289 T_Scope,
290 typename std::void_t<decltype(atomicExch(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
291 : std::true_type
292 {
293 static __device__ T atomic(T* add, T value)
294 {
295 return atomicExch(add, value);
296 }
297 };
298
299 template<typename T>
300 struct AlpakaBuiltInAtomic<
301 alpaka::operation::Exch,
302 T,
303 alpaka::onAcc::scope::Block,
304 typename std::void_t<decltype(atomicExch_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
305 : std::true_type
306 {
307 static __device__ T atomic(T* add, T value)
308 {
309 return atomicExch_block(add, value);
310 }
311 };
312
313 // Inc.
314
315 template<typename T, typename T_Scope>
316 struct AlpakaBuiltInAtomic<
317 alpaka::operation::Inc,
318 T,
319 T_Scope,
320 typename std::void_t<decltype(atomicInc(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
321 : std::true_type
322 {
323 static __device__ T atomic(T* add, T value)
324 {
325 return atomicInc(add, value);
326 }
327 };
328
329 template<typename T>
330 struct AlpakaBuiltInAtomic<
331 alpaka::operation::Inc,
332 T,
333 alpaka::onAcc::scope::Block,
334 typename std::void_t<decltype(atomicInc_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
335 : std::true_type
336 {
337 static __device__ T atomic(T* add, T value)
338 {
339 return atomicInc_block(add, value);
340 }
341 };
342
343 // Dec.
344
345 template<typename T, typename T_Scope>
346 struct AlpakaBuiltInAtomic<
347 alpaka::operation::Dec,
348 T,
349 T_Scope,
350 typename std::void_t<decltype(atomicDec(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
351 : std::true_type
352 {
353 static __device__ T atomic(T* add, T value)
354 {
355 return atomicDec(add, value);
356 }
357 };
358
359 template<typename T>
360 struct AlpakaBuiltInAtomic<
361 alpaka::operation::Dec,
362 T,
363 alpaka::onAcc::scope::Block,
364 typename std::void_t<decltype(atomicDec_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
365 : std::true_type
366 {
367 static __device__ T atomic(T* add, T value)
368 {
369 return atomicDec_block(add, value);
370 }
371 };
372
373 // And.
374
375 template<typename T, typename T_Scope>
376 struct AlpakaBuiltInAtomic<
377 alpaka::operation::And,
378 T,
379 T_Scope,
380 typename std::void_t<decltype(atomicAnd(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
381 : std::true_type
382 {
383 static __device__ T atomic(T* add, T value)
384 {
385 return atomicAnd(add, value);
386 }
387 };
388
389 template<typename T>
390 struct AlpakaBuiltInAtomic<
391 alpaka::operation::And,
392 T,
393 alpaka::onAcc::scope::Block,
394 typename std::void_t<decltype(atomicAnd_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
395 : std::true_type
396 {
397 static __device__ T atomic(T* add, T value)
398 {
399 return atomicAnd_block(add, value);
400 }
401 };
402
403 // Or.
404
405 template<typename T, typename T_Scope>
406 struct AlpakaBuiltInAtomic<
407 alpaka::operation::Or,
408 T,
409 T_Scope,
410 typename std::void_t<decltype(atomicOr(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
411 : std::true_type
412 {
413 static __device__ T atomic(T* add, T value)
414 {
415 return atomicOr(add, value);
416 }
417 };
418
419 template<typename T>
420 struct AlpakaBuiltInAtomic<
421 alpaka::operation::Or,
422 T,
423 alpaka::onAcc::scope::Block,
424 typename std::void_t<decltype(atomicOr_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
425 : std::true_type
426 {
427 static __device__ T atomic(T* add, T value)
428 {
429 return atomicOr_block(add, value);
430 }
431 };
432
433 // Xor.
434
435 template<typename T, typename T_Scope>
436 struct AlpakaBuiltInAtomic<
437 alpaka::operation::Xor,
438 T,
439 T_Scope,
440 typename std::void_t<decltype(atomicXor(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
441 : std::true_type
442 {
443 static __device__ T atomic(T* add, T value)
444 {
445 return atomicXor(add, value);
446 }
447 };
448
449 template<typename T>
450 struct AlpakaBuiltInAtomic<
451 alpaka::operation::Xor,
452 T,
453 alpaka::onAcc::scope::Block,
454 typename std::void_t<decltype(atomicXor_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
455 : std::true_type
456 {
457 static __device__ T atomic(T* add, T value)
458 {
459 return atomicXor_block(add, value);
460 }
461 };
462
463} // namespace alpakaGlobal
464
465#endif
constexpr auto alpaka
Definition fn.hpp:66
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
constexpr auto atomicAnd(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic and operation.
Definition atomic.hpp:143
constexpr auto atomicXor(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic xor operation.
Definition atomic.hpp:165
constexpr auto atomicAdd(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic add operation.
Definition atomic.hpp:66
constexpr auto atomicSub(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic sub operation.
Definition atomic.hpp:77
constexpr auto atomicExch(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic exchange operation.
Definition atomic.hpp:110
constexpr auto atomicDec(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic decrement operation.
Definition atomic.hpp:132
constexpr auto atomicInc(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic increment operation.
Definition atomic.hpp:121
constexpr auto atomicOr(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic or operation.
Definition atomic.hpp:154
constexpr auto atomicMin(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic min operation.
Definition atomic.hpp:88
constexpr auto atomicMax(auto const &acc, T *const addr, T const &value, T_Scope const hier=T_Scope()) -> T
Executes an atomic max operation.
Definition atomic.hpp:99