18#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP
27 template<
typename TAddressType>
28 static __device__
auto reinterpretAddress(TAddressType* address)
29 -> std::enable_if_t<
sizeof(TAddressType) == 4u,
unsigned int*>
31 return reinterpret_cast<unsigned int*
>(address);
35 template<
typename TAddressType>
36 static __device__
auto reinterpretAddress(TAddressType* address)
37 -> std::enable_if_t<
sizeof(TAddressType) == 8u,
unsigned long long int*>
39 return reinterpret_cast<unsigned long long int*
>(address);
43 template<
typename T_Type>
44 static __device__
auto reinterpretValue(T_Type value)
46 return *reinterpretAddress(&value);
58 typename TSfinae = void,
59 typename TDefer =
void>
60 struct EmulateAtomic :
private EmulationBase
63 static __device__
auto atomic(internal::CudaHipAtomic
const ctx, T*
const addr, T
const& value) -> T
65 auto*
const addressAsIntegralType = reinterpretAddress(addr);
66 using EmulatedType = std::decay_t<
decltype(*addressAsIntegralType)>;
71# if __has_builtin(__hip_atomic_load)
72 EmulatedType old{__hip_atomic_load(addressAsIntegralType, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT)};
74 EmulatedType old{__atomic_load_n(addressAsIntegralType, __ATOMIC_RELAXED)};
77 EmulatedType old{*addressAsIntegralType};
83 T v = *(
reinterpret_cast<T*
>(&assumed));
85 using Cas = Atomic::Op<alpaka::operation::Cas, internal::CudaHipAtomic, EmulatedType, T_Scope>;
86 old = Cas::atomicOp(ctx, addressAsIntegralType, assumed, reinterpretValue(v));
88 }
while(assumed != old);
89 return *(
reinterpret_cast<T*
>(&old));
94 template<
typename T,
typename T_Scope>
95 struct EmulateAtomic<
alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope> :
private EmulationBase
97 static __device__
auto atomic(
98 internal::CudaHipAtomic
const ctx,
103 auto*
const addressAsIntegralType = reinterpretAddress(addr);
104 using EmulatedType = std::decay_t<
decltype(*addressAsIntegralType)>;
105 EmulatedType reinterpretedCompare = reinterpretValue(compare);
106 EmulatedType reinterpretedValue = reinterpretValue(value);
109 = Atomic::Op<alpaka::operation::Cas, internal::CudaHipAtomic, EmulatedType, T_Scope>::atomicOp(
111 addressAsIntegralType,
112 reinterpretedCompare,
115 return *(
reinterpret_cast<T*
>(&old));
120 template<
typename T,
typename T_Scope>
121 struct EmulateAtomic<
alpaka::operation::Sub, internal::CudaHipAtomic, T, T_Scope>
123 static __device__
auto atomic(internal::CudaHipAtomic
const ctx, T*
const addr, T
const& value) -> T
125 return Atomic::Op<alpaka::operation::Add, internal::CudaHipAtomic, T, T_Scope>::atomicOp(
133 template<
typename T,
typename T_Scope>
134 struct EmulateAtomic<
136 internal::CudaHipAtomic,
139 std::enable_if_t<std::is_floating_point_v<T>>>
141 static __device__
auto atomic(internal::CudaHipAtomic
const&, T*
const, T
const&) -> T
145 "EmulateAtomic<alpaka::operation::Dec> is not supported for floating point data types!");
151 template<
typename T,
typename T_Scope>
152 struct EmulateAtomic<
154 internal::CudaHipAtomic,
157 std::enable_if_t<std::is_floating_point_v<T>>>
159 static __device__
auto atomic(internal::CudaHipAtomic
const&, T*
const, T
const&) -> T
163 "EmulateAtomic<alpaka::operation::Inc> is not supported for floating point data types!");
169 template<
typename T,
typename T_Scope>
170 struct EmulateAtomic<
172 internal::CudaHipAtomic,
175 std::enable_if_t<std::is_floating_point_v<T>>>
177 static __device__
auto atomic(internal::CudaHipAtomic
const&, T*
const, T
const&) -> T
181 "EmulateAtomic<alpaka::operation::And> is not supported for floating point data types!");
187 template<
typename T,
typename T_Scope>
188 struct EmulateAtomic<
190 internal::CudaHipAtomic,
193 std::enable_if_t<std::is_floating_point_v<T>>>
195 static __device__
auto atomic(internal::CudaHipAtomic
const&, T*
const, T
const&) -> T
199 "EmulateAtomic<alpaka::operation::Or> is not supported for floating point data types!");
205 template<
typename T,
typename T_Scope>
206 struct EmulateAtomic<
208 internal::CudaHipAtomic,
211 std::enable_if_t<std::is_floating_point_v<T>>>
213 static __device__
auto atomic(internal::CudaHipAtomic
const&, T*
const, T
const&) -> T
217 "EmulateAtomic<alpaka::operation::Xor> is not supported for floating point data types!");
229 template<
typename TOp,
typename T,
typename T_Scope>
230 struct Atomic::Op<TOp, internal::CudaHipAtomic, T, T_Scope>
233 internal::CudaHipAtomic
const ctx,
234 [[maybe_unused]] T*
const addr,
235 [[maybe_unused]] T
const& value) -> T
238 sizeof(T) == 4u ||
sizeof(T) == 8u,
239 "atomicOp<TOp, internal::CudaHipAtomic, T>(atomic, addr, value) is not supported! Only 64 and "
240 "32bit atomics are supported.");
242 if constexpr(::AlpakaBuiltInAtomic<TOp, T, T_Scope>::value)
243 return ::AlpakaBuiltInAtomic<TOp, T, T_Scope>::atomic(addr, value);
245 else if constexpr(std::is_same_v<unsigned long int, T>)
247 if constexpr(
sizeof(T) == 4u && ::AlpakaBuiltInAtomic<TOp, unsigned int, T_Scope>::value)
248 return ::AlpakaBuiltInAtomic<TOp, unsigned int, T_Scope>::atomic(
249 reinterpret_cast<unsigned int*
>(addr),
250 static_cast<unsigned int>(value));
252 sizeof(T) == 8u && ::AlpakaBuiltInAtomic<TOp, unsigned long long int, T_Scope>::value)
254 return ::AlpakaBuiltInAtomic<TOp, unsigned long long int, T_Scope>::atomic(
255 reinterpret_cast<unsigned long long int*
>(addr),
256 static_cast<unsigned long long int>(value));
260 return detail::EmulateAtomic<TOp, internal::CudaHipAtomic, T, T_Scope>::atomic(ctx, addr, value);
264 template<
typename T,
typename T_Scope>
265 struct Atomic::Op<
alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope>
268 [[maybe_unused]] internal::CudaHipAtomic
const ctx,
269 [[maybe_unused]] T*
const addr,
270 [[maybe_unused]] T
const& compare,
271 [[maybe_unused]] T
const& value) -> T
274 sizeof(T) == 4u ||
sizeof(T) == 8u,
275 "atomicOp<alpaka::operation::Cas, internal::CudaHipAtomic, T>(atomic, addr, compare, value) is not "
276 "supported! Only 64 and "
277 "32bit atomics are supported.");
279 if constexpr(::AlpakaBuiltInAtomic<alpaka::operation::Cas, T, T_Scope>::value)
280 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, T, T_Scope>::atomic(addr, compare, value);
282 else if constexpr(std::is_same_v<unsigned long int, T>)
285 sizeof(T) == 4u && ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned int, T_Scope>::value)
286 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned int, T_Scope>::atomic(
287 reinterpret_cast<unsigned int*
>(addr),
288 static_cast<unsigned int>(compare),
289 static_cast<unsigned int>(value));
292 && ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned long long int, T_Scope>::value)
294 return ::AlpakaBuiltInAtomic<alpaka::operation::Cas, unsigned long long int, T_Scope>::atomic(
295 reinterpret_cast<unsigned long long int*
>(addr),
296 static_cast<unsigned long long int>(compare),
297 static_cast<unsigned long long int>(value));
301 return detail::EmulateAtomic<alpaka::operation::Cas, internal::CudaHipAtomic, T, T_Scope>::atomic(
constexpr auto atomicOp(auto const &acc, T *const addr, T const &value, T_Scope const scope=T_Scope()) -> T
Executes the given operation atomically.
Implements a atomic operation.