alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
globalMem.hpp
Go to the documentation of this file.
1/* Copyright 2025 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/api/api.hpp"
9#include "alpaka/core/PP.hpp"
11#include "alpaka/mem/MdSpan.hpp"
13
14/** @file global device memory implementation for all APIs
15 *
16 * We need many precompiler macros to handle the device global feature.
17 * The reason is that we would like to have the possibility to create a variable where the same name can be used on all
18 * devices. Each device will have it's own instance of memory and via a global instance GlobalDeviceMemoryWrapper we
19 * redirect queries to the corresponding instance of memory based on the alpaka API.
20 *
21 * OneAPI Sycl is the only API which does not allow querying the device pointer of a global variable from the host.
22 * That's why we have special implementations of onHost::memcpy() which using the device global object directly.
23 * That's also the reason why we can not use the global memory for onHost::fill() or onHost::memset().
24 */
26{
27 struct MemcpyDeviceGlobal;
28} // namespace alpaka::onHost::internal
29
30/** Create a device global variable for the API host */
31#define ALPAKA_DEVICE_GLOBAL_DATA_HOST(attributes, dataType, name, ...) \
32 namespace alpaka_onHost \
33 { \
34 [[maybe_unused]] attributes alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper< \
35 ALPAKA_PP_REMOVE_BRACKETS(dataType)> name __VA_OPT__({__VA_ARGS__}); \
36 }
37
38/** Create a forward declaration of a device global variable for the API host */
39#define ALPAKA_DEVICE_GLOBAL_DATA_HOST_EXTERN(attributes, dataType, name) \
40 namespace alpaka_onHost \
41 { \
42 extern attributes alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)> \
43 name; \
44 }
45
46#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP
47/** Create a device global variable for the API cuda/hip */
48# define ALPAKA_DEVICE_GLOBAL_DATA_CUDA_HIP(attributes, dataType, name, ...) \
49 namespace alpaka_onAccCudaHip \
50 { \
51 __device__ attributes \
52 alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)> \
53 name __VA_OPT__({__VA_ARGS__}); \
54 }
55
56/** Access operator for usage in AlpakaGlobalStorage for API cuda/hip */
57# define ALPAKA_DEVICE_GLOBAL_GET_CUDA_HIP(attributes, dataType, name, ...) \
58 template<typename T_Api> \
59 requires(std::is_same_v<alpaka::api::Cuda, T_Api> || std::is_same_v<alpaka::api::Hip, T_Api>) \
60 constexpr auto& get(T_Api) const \
61 { \
62 return alpaka_onAccCudaHip::name.value; \
63 } \
64 template<typename T_Api> \
65 requires(std::is_same_v<alpaka::api::Cuda, T_Api> || std::is_same_v<alpaka::api::Hip, T_Api>) \
66 constexpr auto& getHandle(T_Api) const \
67 { \
68 return alpaka_onAccCudaHip::name.value; \
69 }
70
71#else
72# define ALPAKA_DEVICE_GLOBAL_DATA_CUDA_HIP(attributes, dataType, name, ...)
73# define ALPAKA_DEVICE_GLOBAL_GET_CUDA_HIP(attributes, dataType, name, ...)
74#endif
75
76#if ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP
77/* Define the device external symbol only if relocatable device code is enabled. nvcc is changing the keyword 'extern'
78 * to static in case rdc is disabled which results into redefinition compile errors. To make HIP and CUDA behave equal
79 * we do not expose the symbal for HIP as well in case rdc is disabled.
80 */
81# if defined(__CUDACC_RDC__) || defined(__CLANG_RDC__)
82# define ALPAKA_DEVICE_GLOBAL_DATA_CUDA_HIP_EXTERN(attributes, dataType, name) \
83 namespace alpaka_onAccCudaHip \
84 { \
85 extern __device__ attributes \
86 alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)> \
87 name; \
88 }
89# else
90/** Create a forward declaration of a device global variable for the API cuda/hip */
91# define ALPAKA_DEVICE_GLOBAL_DATA_CUDA_HIP_EXTERN(attributes, dataType, name)
92# endif
93#else
94# define ALPAKA_DEVICE_GLOBAL_DATA_CUDA_HIP_EXTERN(attributes, dataType, name)
95#endif
96
97#if ALPAKA_LANG_ONEAPI
98/** Create a device global variable for the API oneApi */
99# define ALPAKA_DEVICE_GLOBAL_DATA_ONEAPI(attributes, dataType, name, ...) \
100 namespace alpaka_onAccOneAPI \
101 { \
102 [[maybe_unused]] attributes sycl::ext::oneapi::experimental::device_global< \
103 alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)>> name \
104 __VA_OPT__( \
105 {alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)>{ \
106 __VA_ARGS__}}); \
107 }
108
109/** Create a forward declaration of a device global variable for the API oneApi */
110# define ALPAKA_DEVICE_GLOBAL_DATA_ONEAPI_EXTERN(attributes, dataType, name) \
111 namespace alpaka_onAccOneAPI \
112 { \
113 extern attributes sycl::ext::oneapi::experimental::device_global< \
114 alpaka::onAcc::internal::GlobalDeviceMemoryDataWrapper<ALPAKA_PP_REMOVE_BRACKETS(dataType)>> \
115 name; \
116 }
117
118/** Access operator for usage in AlpakaGlobalStorage for API oneApi */
119# define ALPAKA_DEVICE_GLOBAL_GET_ONEAPI(attributes, dataType, name, ...) \
120 template<typename T_Api> \
121 requires(std::is_same_v<alpaka::api::OneApi, T_Api>) \
122 constexpr auto& get(T_Api) const \
123 { \
124 return alpaka_onAccOneAPI::name.get().value; \
125 } \
126 template<typename T_Api> \
127 requires(std::is_same_v<alpaka::api::OneApi, T_Api>) \
128 constexpr auto& getHandle(T_Api) const \
129 { \
130 return alpaka_onAccOneAPI::name; \
131 }
132#else
133# define ALPAKA_DEVICE_GLOBAL_DATA_ONEAPI(attributes, dataType, name, ...)
134# define ALPAKA_DEVICE_GLOBAL_DATA_ONEAPI_EXTERN(attributes, dataType, name)
135# define ALPAKA_DEVICE_GLOBAL_GET_ONEAPI(attributes, dataType, name, ...)
136#endif
137
139{
140 /** Helper class to wrap device global memory data.
141 *
142 * The reason why this wrapper is required is that SYCL oneAPI is using a special type which does not support C
143 * array initialization. All arguments passed to the wrapper constructor are forwarded to the data member.
144 */
145 template<typename T>
147 {
148 constexpr GlobalDeviceMemoryDataWrapper(auto const&... args) : value{ALPAKA_FORWARD(args)...}
149 {
150 }
151
153
154 T* data()
155 {
156 return &value;
157 }
158
159 T const* data() const
160 {
161 return &value;
162 }
163 };
164
165 /** Specialization of GlobalDeviceMemoryDataWrapper for C static arrays.
166 *
167 * This specialization is required because C static arrays cannot have a constructor.
168 * Therefore, the data member is initialized directly.
169 */
170 template<alpaka::concepts::CStaticArray T>
172 {
174 using value_type = std::remove_all_extents_t<T>;
175
177 {
178 return reinterpret_cast<value_type*>(&value);
179 }
180
181 value_type const* data() const
182 {
183 return reinterpret_cast<value_type const*>(&value);
184 }
185 };
186
187 /** Helper class to provide access to device global memory variables */
188 template<typename T_Storage, typename T_Type>
189 struct GlobalDeviceMemoryWrapper : private T_Storage
190 {
191 private:
193
194 /** Get the handle to call native API specific memcopy for global device memory operation
195 *
196 * @attention This method is for internal usage only.
197 *
198 * @return type depends on the native API e.g Cuda, OneApi, ...
199 */
200 template<alpaka::concepts::Api T_Api>
201 constexpr decltype(auto) getHandle(T_Api api) const
202 {
203 return T_Storage::getHandle(api);
204 }
205
206 public:
207 using type = T_Type;
208
209 constexpr decltype(auto) get() const
210 {
211 return T_Storage::get(thisApi());
212 }
213
214 constexpr decltype(auto) get() const requires(std::is_array_v<type>)
215 {
216 // a static array of type C also uses size_t to define its length
217 return alpaka::MdSpanArray<type, size_t>{T_Storage::get(thisApi())};
218 }
219
220 constexpr operator type&()
221 {
222 return T_Storage::get(thisApi());
223 }
224
225 constexpr operator type const&() const
226 {
227 return T_Storage::get(thisApi());
228 }
229 };
230} // namespace alpaka::onAcc::internal
#define ALPAKA_FORWARD(instance)
Perfectly forward an instance as argument.
Definition common.hpp:147
constexpr auto thisApi()
provides the API used during the execution of the current code path
Definition api.hpp:26
constexpr GlobalDeviceMemoryDataWrapper(auto const &... args)
Helper class to provide access to device global memory variables.
constexpr decltype(auto) getHandle(T_Api api) const
Get the handle to call native API specific memcopy for global device memory operation.