alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
ApiHipRt.hpp
Go to the documentation of this file.
1/* Copyright 2022 Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
8
9#if ALPAKA_LANG_HIP
10
11# include <hip/hip_runtime_api.h>
12# include <hip/hip_version.h>
13
14namespace alpaka
15{
16 struct ApiHipRt
17 {
18 // Names
19 static constexpr char name[] = "Hip";
20 static constexpr auto version = ALPAKA_VERSION_NUMBER(HIP_VERSION_MAJOR, HIP_VERSION_MINOR, 0);
21
22 // Types
23 using DeviceAttr_t = ::hipDeviceAttribute_t;
24 using PointerAttr_t = ::hipPointerAttribute_t;
25 using Memory_t = ::hipMemoryType;
26 using DeviceProp_t = ::hipDeviceProp_t;
27 using Error_t = ::hipError_t;
28 using Event_t = ::hipEvent_t;
29 using Extent_t = ::hipExtent;
30 using Flag_t = unsigned int;
31 using FuncAttributes_t = ::hipFuncAttributes;
32 using HostFn_t = void (*)(void* data); // same as hipHostFn_t
33 using Limit_t = ::hipLimit_t;
34 using Memcpy3DParms_t = ::hipMemcpy3DParms;
35 using MemcpyKind_t = ::hipMemcpyKind;
36 using PitchedPtr_t = ::hipPitchedPtr;
37 using Pos_t = ::hipPos;
38 using Stream_t = ::hipStream_t;
39
40 // Constants
41 static constexpr Error_t success = ::hipSuccess;
42 static constexpr Error_t errorNotReady = ::hipErrorNotReady;
43 static constexpr Error_t errorHostMemoryAlreadyRegistered = ::hipErrorHostMemoryAlreadyRegistered;
44 static constexpr Error_t errorHostMemoryNotRegistered = ::hipErrorHostMemoryNotRegistered;
45 static constexpr Error_t errorUnsupportedLimit = ::hipErrorUnsupportedLimit;
46 static constexpr Error_t errorUnknown = ::hipErrorUnknown;
47
48 static constexpr Flag_t eventDefault = hipEventDefault;
49 static constexpr Flag_t eventBlockingSync = hipEventBlockingSync;
50 static constexpr Flag_t eventDisableTiming = hipEventDisableTiming;
51 static constexpr Flag_t eventInterprocess = hipEventInterprocess;
52
53 static constexpr Flag_t hostMallocDefault = hipHostMallocDefault;
54 static constexpr Flag_t hostMallocMapped = hipHostMallocMapped;
55 static constexpr Flag_t hostMallocPortable = hipHostMallocPortable;
56 static constexpr Flag_t hostMallocWriteCombined = hipHostMallocWriteCombined;
57 static constexpr Flag_t hostMallocCoherent = hipHostMallocCoherent;
58 static constexpr Flag_t hostMallocNonCoherent = hipHostMallocNonCoherent;
59
60 static constexpr Flag_t hostRegisterDefault = hipHostRegisterDefault;
61 static constexpr Flag_t hostRegisterPortable = hipHostRegisterPortable;
62 static constexpr Flag_t hostRegisterMapped = hipHostRegisterMapped;
63 static constexpr Flag_t hostRegisterIoMemory = hipHostRegisterIoMemory;
64
65 static constexpr MemcpyKind_t memcpyDefault = ::hipMemcpyDefault;
66 static constexpr MemcpyKind_t memcpyDeviceToDevice = ::hipMemcpyDeviceToDevice;
67 static constexpr MemcpyKind_t memcpyDeviceToHost = ::hipMemcpyDeviceToHost;
68 static constexpr MemcpyKind_t memcpyHostToDevice = ::hipMemcpyHostToDevice;
69 static constexpr MemcpyKind_t memcpyHostToHost = ::hipMemcpyHostToHost;
70
71 static constexpr Flag_t streamDefault = hipStreamDefault;
72 static constexpr Flag_t streamNonBlocking = hipStreamNonBlocking;
73
74 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::hipDeviceAttributeMaxBlockDimX;
75 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::hipDeviceAttributeMaxBlockDimY;
76 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::hipDeviceAttributeMaxBlockDimZ;
77 static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::hipDeviceAttributeMaxGridDimX;
78 static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::hipDeviceAttributeMaxGridDimY;
79 static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::hipDeviceAttributeMaxGridDimZ;
80 static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
81 = ::hipDeviceAttributeMaxSharedMemoryPerBlock;
82 static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::hipDeviceAttributeMaxThreadsPerBlock;
83 static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::hipDeviceAttributeMultiprocessorCount;
84 static constexpr DeviceAttr_t deviceAttributeWarpSize = ::hipDeviceAttributeWarpSize;
85
86 static constexpr Memory_t memoryTypeUnregistered = ::hipMemoryTypeUnregistered;
87 static constexpr Memory_t memoryTypeHost = ::hipMemoryTypeHost;
88 static constexpr Memory_t memoryTypeDevice = ::hipMemoryTypeDevice;
89 static constexpr Memory_t memoryTypeManaged = ::hipMemoryTypeManaged;
90
91# if HIP_VERSION >= 40'500'000
92 static constexpr Limit_t limitPrintfFifoSize = ::hipLimitPrintfFifoSize;
93# else
94 static constexpr Limit_t limitPrintfFifoSize
95 = static_cast<Limit_t>(0x01); // Implemented only in ROCm 4.5.0 and later.
96# endif
97 static constexpr Limit_t limitMallocHeapSize = ::hipLimitMallocHeapSize;
98
99 // Host function helper
100 // Encapsulates the different function signatures used by hipStreamAddCallback and hipLaunchHostFn, and the
101 // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
102 struct HostFnAdaptor
103 {
104 HostFn_t func_;
105 void* data_;
106
107 static void hostFunction(void* data)
108 {
109 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
110 ptr->func_(ptr->data_);
111 delete ptr;
112 }
113
114 static void streamCallback(Stream_t, Error_t, void* data)
115 {
116 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
117 ptr->func_(ptr->data_);
118 delete ptr;
119 }
120 };
121
122 // Runtime API
123 static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
124 {
125 return ::hipDeviceGetAttribute(value, attr, device);
126 }
127
128 static inline Error_t pointerGetAttributes(PointerAttr_t* attr, void const* ptr)
129 {
130 return ::hipPointerGetAttributes(attr, ptr);
131 }
132
133 static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
134 {
135# if HIP_VERSION < 40'500'000
136 if(limit == limitPrintfFifoSize)
137 {
138 // Implemented only in ROCm 4.5.0 and later.
139 return errorUnsupportedLimit;
140 }
141# endif
142 return ::hipDeviceGetLimit(pValue, limit);
143 }
144
145 static inline Error_t deviceReset()
146 {
147 return ::hipDeviceReset();
148 }
149
150 static inline Error_t deviceSetLimit(Limit_t /* limit */, size_t /* value */)
151 {
152 // Not implemented.
153 return errorUnsupportedLimit;
154 }
155
156 static inline Error_t deviceSynchronize()
157 {
158 return ::hipDeviceSynchronize();
159 }
160
161 static inline Error_t eventCreate(Event_t* event)
162 {
163 return ::hipEventCreate(event);
164 }
165
166 static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
167 {
168 return ::hipEventCreateWithFlags(event, flags);
169 }
170
171 static inline Error_t eventDestroy(Event_t event)
172 {
173 return ::hipEventDestroy(event);
174 }
175
176 static inline Error_t eventQuery(Event_t event)
177 {
178 return ::hipEventQuery(event);
179 }
180
181 static inline Error_t eventRecord(Event_t event, Stream_t stream)
182 {
183 return ::hipEventRecord(event, stream);
184 }
185
186 static inline Error_t eventSynchronize(Event_t event)
187 {
188 return ::hipEventSynchronize(event);
189 }
190
191 static inline Error_t free(void* devPtr)
192 {
193 return ::hipFree(devPtr);
194 }
195
196 static inline Error_t freeAsync(void* devPtr, Stream_t stream)
197 {
198 // hipFreeAsync fails on a null pointer deallocation
199 if(devPtr)
200 {
201 return ::hipFreeAsync(devPtr, stream);
202 }
203 else
204 {
205 return ::hipSuccess;
206 }
207 }
208
209 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
210 {
211 return ::hipFuncGetAttributes(attr, func);
212 }
213
214 template<typename T>
215 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
216 {
217 // # if ALPAKA_COMP_GNUC
218 // # pragma GCC diagnostic push
219 // # pragma GCC diagnostic ignored "-Wconditionally-supported"
220 // # endif
221 return ::hipFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
222 // # if ALPAKA_COMP_GNUC
223 // # pragma GCC diagnostic pop
224 // # endif
225 }
226
227 static inline Error_t getDeviceCount(int* count)
228 {
229 return ::hipGetDeviceCount(count);
230 }
231
232 static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
233 {
234 return ::hipGetDeviceProperties(prop, device);
235 }
236
237 static inline char const* getErrorName(Error_t error)
238 {
239 return ::hipGetErrorName(error);
240 }
241
242 static inline char const* getErrorString(Error_t error)
243 {
244 return ::hipGetErrorString(error);
245 }
246
247 static inline Error_t getLastError()
248 {
249 return ::hipGetLastError();
250 }
251
252 static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
253 {
254 return ::hipGetSymbolAddress(devPtr, symbol);
255 }
256
257 template<class T>
258 static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
259 {
260 return ::hipGetSymbolAddress(devPtr, symbol);
261 }
262
263 static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
264 {
265 return ::hipHostGetDevicePointer(pDevice, pHost, flags);
266 }
267
268 static inline Error_t hostFree(void* ptr)
269 {
270 return ::hipHostFree(ptr);
271 }
272
273 static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
274 {
275 return ::hipHostMalloc(ptr, size, flags);
276 }
277
278 static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
279 {
280 return ::hipHostRegister(ptr, size, flags);
281 }
282
283 static inline Error_t hostUnregister(void* ptr)
284 {
285 return ::hipHostUnregister(ptr);
286 }
287
288 static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
289 {
290 // hipLaunchHostFunc is implemented only in ROCm 5.4.0 and later.
291# if HIP_VERSION >= 50'400'000
292 // Wrap the host function using the proper calling convention.
293 return ::hipLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
294# else
295 // Emulate hipLaunchHostFunc using hipStreamAddCallback with a callback adaptor.
296 return ::hipStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
297# endif
298 }
299
300 static inline Error_t malloc(void** devPtr, size_t size)
301 {
302 return ::hipMalloc(devPtr, size);
303 }
304
305 static inline Error_t mallocManaged(void** devPtr, size_t size)
306 {
307 return ::hipMallocManaged(devPtr, size);
308 }
309
310 static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
311 {
312 return ::hipMalloc3D(pitchedDevPtr, extent);
313 }
314
315 static inline Error_t mallocAsync(
316 [[maybe_unused]] void** devPtr,
317 [[maybe_unused]] size_t size,
318 [[maybe_unused]] Stream_t stream)
319 {
320 // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
321# if HIP_VERSION >= 50'600'000
322 return ::hipMallocAsync(devPtr, size, stream);
323# elif HIP_VERSION >= 50'300'000
324 // before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes
325 if(size > 0)
326 {
327 return ::hipMallocAsync(devPtr, size, stream);
328 }
329 else
330 {
331 // make sure the pointer can safely be passed to hipFreeAsync
332 *devPtr = nullptr;
333 return ::hipSuccess;
334 }
335# else
336 // Not implemented.
337 return errorUnknown;
338# endif
339 }
340
341 static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
342 {
343 return ::hipMallocPitch(devPtr, pitch, width, height);
344 }
345
346 static inline Error_t memGetInfo(size_t* free, size_t* total)
347 {
348 return ::hipMemGetInfo(free, total);
349 }
350
351 static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
352 {
353 return ::hipMemcpy(dst, src, count, kind);
354 }
355
356 static inline Error_t memcpy2DAsync(
357 void* dst,
358 size_t dpitch,
359 void const* src,
360 size_t spitch,
361 size_t width,
362 size_t height,
363 MemcpyKind_t kind,
364 Stream_t stream)
365 {
366 return ::hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
367 }
368
369 static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
370 {
371 return ::hipMemcpy3DAsync(p, stream);
372 }
373
374 static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
375 {
376 return ::hipMemcpyAsync(dst, src, count, kind, stream);
377 }
378
379 static inline Error_t memset2DAsync(
380 void* devPtr,
381 size_t pitch,
382 int value,
383 size_t width,
384 size_t height,
385 Stream_t stream)
386 {
387 return ::hipMemset2DAsync(devPtr, pitch, value, width, height, stream);
388 }
389
390 static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
391 {
392 return ::hipMemset3DAsync(pitchedDevPtr, value, extent, stream);
393 }
394
395 static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
396 {
397 return ::hipMemsetAsync(devPtr, value, count, stream);
398 }
399
400 static inline Error_t setDevice(int device)
401 {
402 return ::hipSetDevice(device);
403 }
404
405 static inline Error_t streamCreate(Stream_t* pStream)
406 {
407 return ::hipStreamCreate(pStream);
408 }
409
410 static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
411 {
412 return ::hipStreamCreateWithFlags(pStream, flags);
413 }
414
415 static inline Error_t streamDestroy(Stream_t stream)
416 {
417 return ::hipStreamDestroy(stream);
418 }
419
420 static inline Error_t streamQuery(Stream_t stream)
421 {
422 return ::hipStreamQuery(stream);
423 }
424
425 static inline Error_t streamSynchronize(Stream_t stream)
426 {
427 return ::hipStreamSynchronize(stream);
428 }
429
430 static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
431 {
432 return ::hipStreamWaitEvent(stream, event, flags);
433 }
434
435 static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
436 {
437 return ::make_hipPitchedPtr(d, p, xsz, ysz);
438 }
439
440 static inline Pos_t makePos(size_t x, size_t y, size_t z)
441 {
442 return ::make_hipPos(x, y, z);
443 }
444
445 static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
446 {
447 return ::make_hipExtent(w, h, d);
448 }
449 };
450
451} // namespace alpaka
452
453#endif
#define ALPAKA_VERSION_NUMBER(major, minor, patch)
Definition PP.hpp:29
DeviceProperties getDeviceProperties(auto const &platform, uint32_t idx)
decltype(auto) data(auto &&any)
pointer to data of an object
void memcpy(Queue< T_Device, T_QueueKind > const &queue, auto &&dest, auto const &source)
copy data byte wise from one to another container
Definition Queue.hpp:237
main alpaka namespace.
Definition alpaka.hpp:76