alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
computeApi.hpp
Go to the documentation of this file.
1/* Copyright 2025 Andrea Bocci, René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#include <type_traits>
11
12#if ALPAKA_LANG_HIP
13
14namespace alpaka::onAcc::unifiedCudaHip::internal
15{
16 template<>
17 struct WarpSize::Get<alpaka::deviceKind::AmdGpu>
18 {
19 constexpr auto operator()() const
20 {
21# if defined(__HIP_DEVICE_COMPILE__)
22 // HIP/ROCm may have a wavefront of 32 or 64 depending on the target device
23# if defined(__GFX9__)
24 // GCN 5.0 and CDNA GPUs have a wavefront size of 64
25 return std::integral_constant<uint32_t, 64u>{};
26# elif defined(__GFX10__) or defined(__GFX11__) or defined(__GFX12__)
27 // RDNA GPUs have a wavefront size of 32
28 return std::integral_constant<uint32_t, 32u>{};
29# else
30 // Unknown AMD GPU architecture
31# ifdef ALPAKA_DEFAULT_HIP_WAVEFRONT_SIZE
32 return std::integral_constant<uint32_t, ALPAKA_DEFAULT_HIP_WAVEFRONT_SIZE>{};
33# else
34# error The current AMD GPU architucture is not supported by this version of alpaka. You can define a default wavefront size setting the preprocessor macro ALPAKA_DEFAULT_HIP_WAVEFRONT_SIZE
35 // return 32 instead of zero to avoid errors due to possible devision by zero, the code will throw at this
36 // point anyway therefore we can return what we want
37 return std::integral_constant<uint32_t, 32u>{};
38# endif
39# endif
40# else
41 // return one to avoid division by zero warnings when the host path is parsed.
42 return std::integral_constant<uint32_t, 1u>{};
43# endif
44 }
45 };
46} // namespace alpaka::onAcc::unifiedCudaHip::internal
47
48#endif
constexpr auto alpaka
Definition fn.hpp:66