alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
atomic.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, René Widera, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Antonio Di Pilato
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
13#include "alpaka/operation.hpp"
14
15#include <limits>
16#include <type_traits>
17
18#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP
19
20namespace alpaka::onAcc::internalCompute
21{
22 namespace detail
23 {
24 struct EmulationBase
25 {
26 //! reinterprets an address as an 32bit value for atomicCas emulation usage
27 template<typename TAddressType>
28 static __device__ auto reinterpretAddress(TAddressType* address)
29 -> std::enable_if_t<sizeof(TAddressType) == 4u, unsigned int*>
30 {
31 return reinterpret_cast<unsigned int*>(address);
32 }
33
34 //! reinterprets a address as an 64bit value for atomicCas emulation usage
35 template<typename TAddressType>
36 static __device__ auto reinterpretAddress(TAddressType* address)
37 -> std::enable_if_t<sizeof(TAddressType) == 8u, unsigned long long int*>
38 {
39 return reinterpret_cast<unsigned long long int*>(address);
40 }
41
42 //! reinterprets a value to be usable for the atomicCAS emulation
43 template<typename T_Type>
44 static __device__ auto reinterpretValue(T_Type value)
45 {
46 return *reinterpretAddress(&value);
47 }
48 };
49
50 //! Emulate atomic
51 //
52 // The default implementation will emulate all atomic functions with atomicCAS.
53 template<
54 typename TOp,
55 typename TAtomic,
56 typename T,
57 typename T_Scope,
58 typename TSfinae = void,
59 typename TDefer = void>
60 struct EmulateAtomic : private EmulationBase
61 {
62 public:
63 static __device__ auto atomic(internal::CudaHipAtomic const ctx, T* const addr, T const& value) -> T
64 {
65 auto* const addressAsIntegralType = reinterpretAddress(addr);
66 using EmulatedType = std::decay_t<decltype(*addressAsIntegralType)>;
67
68 // Emulating atomics with atomicCAS is mentioned in the programming guide too.
69 // http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions
70# if ALPAKA_LANG_HIP
71# if __has_builtin(__hip_atomic_load)
72 EmulatedType old{__hip_atomic_load(addressAsIntegralType, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT)};
73# else
74 EmulatedType old{__atomic_load_n(addressAsIntegralType, __ATOMIC_RELAXED)};
75# endif
76# else
77 EmulatedType old{*addressAsIntegralType};
78# endif
79 EmulatedType assumed;
80 do
81 {
82 assumed = old;
83 T v = *(reinterpret_cast<T*>(&assumed));
84 TOp{}(&v, value);
85 using Cas = Atomic::Op<alpaka::operation::Cas, internal::CudaHipAtomic, EmulatedType, T_Scope>;
86 old = Cas::atomicOp(ctx, addressAsIntegralType, assumed, reinterpretValue(v));
87 // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
88 } while(assumed != old);
89 return *(reinterpret_cast<T*>(&old));
90 }
91 };
92
93 //! Emulate operation::Cas with equivalent unisigned integral type
94 template<typename T, typename T_Scope>
95 struct EmulateAtomic<alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope> : private EmulationBase
96 {
97 static __device__ auto atomic(
98 internal::CudaHipAtomic const ctx,
99 T* const addr,
100 T const& compare,
101 T const& value) -> T
102 {
103 auto* const addressAsIntegralType = reinterpretAddress(addr);
104 using EmulatedType = std::decay_t<decltype(*addressAsIntegralType)>;
105 EmulatedType reinterpretedCompare = reinterpretValue(compare);
106 EmulatedType reinterpretedValue = reinterpretValue(value);
107
108 auto old
109 = Atomic::Op<alpaka::operation::Cas, internal::CudaHipAtomic, EmulatedType, T_Scope>::atomicOp(
110 ctx,
111 addressAsIntegralType,
112 reinterpretedCompare,
113 reinterpretedValue);
114
115 return *(reinterpret_cast<T*>(&old));
116 }
117 };
118
119 //! Emulate operation::Sub with atomicAdd
120 template<typename T, typename T_Scope>
121 struct EmulateAtomic<alpaka::operation::Sub, internal::CudaHipAtomic, T, T_Scope>
122 {
123 static __device__ auto atomic(internal::CudaHipAtomic const ctx, T* const addr, T const& value) -> T
124 {
125 return Atomic::Op<alpaka::operation::Add, internal::CudaHipAtomic, T, T_Scope>::atomicOp(
126 ctx,
127 addr,
128 -value);
129 }
130 };
131
132 //! operation::Dec can not be implemented for floating point types!
133 template<typename T, typename T_Scope>
134 struct EmulateAtomic<
135 operation::Dec,
136 internal::CudaHipAtomic,
137 T,
138 T_Scope,
139 std::enable_if_t<std::is_floating_point_v<T>>>
140 {
141 static __device__ auto atomic(internal::CudaHipAtomic const&, T* const, T const&) -> T
142 {
143 static_assert(
144 !sizeof(T),
145 "EmulateAtomic<alpaka::operation::Dec> is not supported for floating point data types!");
146 return T{};
147 }
148 };
149
150 //! operation::Inc can not be implemented for floating point types!
151 template<typename T, typename T_Scope>
152 struct EmulateAtomic<
153 operation::Inc,
154 internal::CudaHipAtomic,
155 T,
156 T_Scope,
157 std::enable_if_t<std::is_floating_point_v<T>>>
158 {
159 static __device__ auto atomic(internal::CudaHipAtomic const&, T* const, T const&) -> T
160 {
161 static_assert(
162 !sizeof(T),
163 "EmulateAtomic<alpaka::operation::Inc> is not supported for floating point data types!");
164 return T{};
165 }
166 };
167
168 //! operation::And can not be implemented for floating point types!
169 template<typename T, typename T_Scope>
170 struct EmulateAtomic<
171 operation::And,
172 internal::CudaHipAtomic,
173 T,
174 T_Scope,
175 std::enable_if_t<std::is_floating_point_v<T>>>
176 {
177 static __device__ auto atomic(internal::CudaHipAtomic const&, T* const, T const&) -> T
178 {
179 static_assert(
180 !sizeof(T),
181 "EmulateAtomic<alpaka::operation::And> is not supported for floating point data types!");
182 return T{};
183 }
184 };
185
186 //! operation::Or can not be implemented for floating point types!
187 template<typename T, typename T_Scope>
188 struct EmulateAtomic<
189 operation::Or,
190 internal::CudaHipAtomic,
191 T,
192 T_Scope,
193 std::enable_if_t<std::is_floating_point_v<T>>>
194 {
195 static __device__ auto atomic(internal::CudaHipAtomic const&, T* const, T const&) -> T
196 {
197 static_assert(
198 !sizeof(T),
199 "EmulateAtomic<alpaka::operation::Or> is not supported for floating point data types!");
200 return T{};
201 }
202 };
203
204 //! operation::Xor can not be implemented for floating point types!
205 template<typename T, typename T_Scope>
206 struct EmulateAtomic<
207 operation::Xor,
208 internal::CudaHipAtomic,
209 T,
210 T_Scope,
211 std::enable_if_t<std::is_floating_point_v<T>>>
212 {
213 static __device__ auto atomic(internal::CudaHipAtomic const&, T* const, T const&) -> T
214 {
215 static_assert(
216 !sizeof(T),
217 "EmulateAtomic<alpaka::operation::Xor> is not supported for floating point data types!");
218 return T{};
219 }
220 };
221
222 } // namespace detail
223
224 //! Generic atomic implementation
225 //
226 // - unsigned long int will be redirected to unsigned long long int or unsigned int implementation depending if
227 // unsigned long int is a 64 or 32bit data type.
228 // - Atomics which are not available as builtin atomic will be emulated.
229 template<typename TOp, typename T, typename T_Scope>
230 struct Atomic::Op<TOp, internal::CudaHipAtomic, T, T_Scope>
231 {
232 static __device__ auto atomicOp(
233 internal::CudaHipAtomic const ctx,
234 [[maybe_unused]] T* const addr,
235 [[maybe_unused]] T const& value) -> T
236 {
237 static_assert(
238 sizeof(T) == 4u || sizeof(T) == 8u,
239 "atomicOp<TOp, internal::CudaHipAtomic, T>(atomic, addr, value) is not supported! Only 64 and "
240 "32bit atomics are supported.");
241
242 if constexpr(::AlpakaBuiltInAtomic<TOp, T, T_Scope>::value)
243 return ::AlpakaBuiltInAtomic<TOp, T, T_Scope>::atomic(addr, value);
244
245 else if constexpr(std::is_same_v<unsigned long int, T>)
246 {
247 if constexpr(sizeof(T) == 4u && ::AlpakaBuiltInAtomic<TOp, unsigned int, T_Scope>::value)
248 return ::AlpakaBuiltInAtomic<TOp, unsigned int, T_Scope>::atomic(
249 reinterpret_cast<unsigned int*>(addr),
250 static_cast<unsigned int>(value));
251 else if constexpr(
252 sizeof(T) == 8u && ::AlpakaBuiltInAtomic<TOp, unsigned long long int, T_Scope>::value) // LP64
253 {
254 return ::AlpakaBuiltInAtomic<TOp, unsigned long long int, T_Scope>::atomic(
255 reinterpret_cast<unsigned long long int*>(addr),
256 static_cast<unsigned long long int>(value));
257 }
258 }
259
260 return detail::EmulateAtomic<TOp, internal::CudaHipAtomic, T, T_Scope>::atomic(ctx, addr, value);
261 }
262 };
263
264 template<typename T, typename T_Scope>
265 struct Atomic::Op<alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope>
266 {
267 static __device__ auto atomicOp(
268 [[maybe_unused]] internal::CudaHipAtomic const ctx,
269 [[maybe_unused]] T* const addr,
270 [[maybe_unused]] T const& compare,
271 [[maybe_unused]] T const& value) -> T
272 {
273 static_assert(
274 sizeof(T) == 4u || sizeof(T) == 8u,
275 "atomicOp<alpaka::operation::Cas, internal::CudaHipAtomic, T>(atomic, addr, compare, value) is not "
276 "supported! Only 64 and "
277 "32bit atomics are supported.");
278
279 if constexpr(::AlpakaBuiltInAtomic<alpaka::operation::Cas, T, T_Scope>::value)
280 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, T, T_Scope>::atomic(addr, compare, value);
281
282 else if constexpr(std::is_same_v<unsigned long int, T>)
283 {
284 if constexpr(
285 sizeof(T) == 4u && ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned int, T_Scope>::value)
286 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned int, T_Scope>::atomic(
287 reinterpret_cast<unsigned int*>(addr),
288 static_cast<unsigned int>(compare),
289 static_cast<unsigned int>(value));
290 else if constexpr(
291 sizeof(T) == 8u
292 && ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned long long int, T_Scope>::value) // LP64
293 {
294 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned long long int, T_Scope>::atomic(
295 reinterpret_cast<unsigned long long int*>(addr),
296 static_cast<unsigned long long int>(compare),
297 static_cast<unsigned long long int>(value));
298 }
299 }
300
301 return detail::EmulateAtomic<alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope>::atomic(
302 ctx,
303 addr,
304 compare,
305 value);
306 }
307 };
308} // namespace alpaka::onAcc::internalCompute
309#endif
constexpr auto alpaka
Definition fn.hpp:66
constexpr auto atomicOp(auto const &acc, T *const addr, T const &value, T_Scope const scope=T_Scope()) -> T
Executes the given operation atomically.
Definition atomic.hpp:26