19namespace alpaka::onAcc::internalCompute
24 template<concepts::MemoryOrder TMemOrder>
25 [[maybe_unused]]
static constexpr __device__
void cuda_ptx_fence_system([[maybe_unused]] TMemOrder
const)
27# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(9, 0, 0)
29 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
32 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
34 asm volatile(
"fence.acquire.sys;" ::);
36 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
38 asm volatile(
"fence.release.sys;" ::);
40 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
42 asm volatile(
"fence.acq_rel.sys;" ::);
46 asm volatile(
"fence.sc.sys;" ::);
48# elif ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
50 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
53 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
55 asm volatile(
"fence.acq_rel.sys;" ::);
57 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
59 asm volatile(
"fence.acq_rel.sys;" ::);
61 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
63 asm volatile(
"fence.acq_rel.sys;" ::);
68 asm volatile(
"fence.sc.sys;" ::);
73 template<concepts::MemoryOrder TMemOrder>
74 [[maybe_unused]]
static constexpr __device__
void cuda_ptx_fence_device([[maybe_unused]] TMemOrder
const)
76# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(9, 0, 0)
78 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
81 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
83 asm volatile(
"fence.acquire.gpu;" ::);
85 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
87 asm volatile(
"fence.release.gpu;" ::);
89 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
91 asm volatile(
"fence.acq_rel.gpu;" ::);
95 asm volatile(
"fence.sc.gpu;" ::);
97# elif ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
99 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
102 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
104 asm volatile(
"fence.acq_rel.gpu;" ::);
106 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
108 asm volatile(
"fence.acq_rel.gpu;" ::);
110 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
112 asm volatile(
"fence.acq_rel.gpu;" ::);
117 asm volatile(
"fence.sc.gpu;" ::);
122 template<concepts::MemoryOrder TMemOrder>
123 [[maybe_unused]]
static constexpr __device__
void cuda_ptx_fence_block([[maybe_unused]] TMemOrder
const)
125# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(9, 0, 0)
127 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
130 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
132 asm volatile(
"fence.acquire.cta;" ::);
134 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
136 asm volatile(
"fence.release.cta;" ::);
138 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
140 asm volatile(
"fence.acq_rel.cta;" ::);
144 asm volatile(
"fence.sc.cta;" ::);
146# elif ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
148 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
151 else if constexpr(std::is_same_v<TMemOrder, order::Acquire>)
153 asm volatile(
"fence.acq_rel.cta;" ::);
155 else if constexpr(std::is_same_v<TMemOrder, order::Release>)
157 asm volatile(
"fence.acq_rel.cta;" ::);
159 else if constexpr(std::is_same_v<TMemOrder, order::AcqRel>)
161 asm volatile(
"fence.acq_rel.cta;" ::);
165 asm volatile(
"fence.sc.cta;" ::);
170 template<concepts::MemoryOrder TMemOrder>
171 [[maybe_unused]]
static constexpr __device__
void cuda_mem_fence_block([[maybe_unused]] TMemOrder
const order)
173 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
177# ifdef ALPAKA_CUDA_ATOMIC
178 ::cuda::atomic_thread_fence(MemOrderCuda::get(order), ::cuda::thread_scope_block);
181# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
182 cuda_ptx_fence_block(order);
184 __threadfence_block();
190 template<concepts::MemoryOrder TMemOrder>
191 [[maybe_unused]]
static constexpr __device__
void cuda_mem_fence_device([[maybe_unused]] TMemOrder
const order)
193 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
197# ifdef ALPAKA_CUDA_ATOMIC
198 ::cuda::atomic_thread_fence(MemOrderCuda::get(order), ::cuda::thread_scope_device);
201# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
202 cuda_ptx_fence_device(order);
210 template<concepts::MemoryOrder TMemOrder>
211 [[maybe_unused]]
static constexpr __device__
void cuda_mem_fence_system([[maybe_unused]] TMemOrder
const order)
213 if constexpr(std::is_same_v<TMemOrder, order::Relaxed>)
217# ifdef ALPAKA_CUDA_ATOMIC
218 ::cuda::atomic_thread_fence(MemOrderCuda::get(order), ::cuda::thread_scope_system);
221# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
222 cuda_ptx_fence_system(order);
224 __threadfence_system();
236 template<
typename T_Api,
typename T_Scope, concepts::MemoryOrder T_Order>
237 requires std::same_as<T_Api, api::Cuda>
238 struct MemoryFence::Op<T_Api, T_Scope, T_Order>
240 ALPAKA_FN_ACC constexpr void operator()(onAcc::concepts::Acc
auto const&, T_Scope
const, T_Order
const order)
245 if constexpr(std::is_same_v<T_Scope, scope::Block>)
247 detail::cuda_mem_fence_block(order);
249 else if constexpr(std::is_same_v<T_Scope, scope::Device>)
251 detail::cuda_mem_fence_device(order);
253 else if constexpr(std::is_same_v<T_Scope, scope::System>)
255 detail::cuda_mem_fence_system(order);
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...