alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
intrinsic.hpp
Go to the documentation of this file.
1/* Copyright 2025 Luca Venerando Greco, René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
13
14#include <bit>
15
16#if (ALPAKA_LANG_CUDA || ALPAKA_LANG_HIP)
18{
19 template<typename T_Arg>
20 struct Popcount::Op<alpaka::internal::CudaHipIntrinsic, T_Arg>
21 {
22 inline __device__ auto operator()(alpaka::internal::CudaHipIntrinsic const, T_Arg const& val) const
23 {
24 if constexpr(sizeof(T_Arg) == 4u)
25 {
26 return __popc(std::bit_cast<unsigned int>(val));
27 }
28 else if constexpr(sizeof(T_Arg) == 8u)
29 {
30 return __popcll(std::bit_cast<unsigned long long>(val));
31 }
32 else
33 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
34
35 ALPAKA_UNREACHABLE(int{});
36 }
37 };
38
39 template<typename T_Arg>
40 struct Ffs::Op<alpaka::internal::CudaHipIntrinsic, T_Arg>
41 {
42 inline __device__ auto operator()(alpaka::internal::CudaHipIntrinsic const, T_Arg const& val) const
43 {
44 if constexpr(sizeof(T_Arg) == 4u)
45 {
46 return __ffs(std::bit_cast<int>(val));
47 }
48 else if constexpr(sizeof(T_Arg) == 8u)
49 {
50 return __ffsll(std::bit_cast<long long int>(val));
51 }
52 else
53 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
54
55 ALPAKA_UNREACHABLE(int{});
56 }
57 };
58
59 template<typename T_Arg>
60 struct Clz::Op<alpaka::internal::CudaHipIntrinsic, T_Arg>
61 {
62 inline __device__ auto operator()(alpaka::internal::CudaHipIntrinsic const, T_Arg const& val) const
63 {
64 if constexpr(sizeof(T_Arg) == 4u)
65 {
66 return __clz(std::bit_cast<int>(val));
67 }
68 else if constexpr(sizeof(T_Arg) == 8u)
69 {
70 return __clzll(std::bit_cast<long long int>(val));
71 }
72 else
73 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
74
75 ALPAKA_UNREACHABLE(int{});
76 }
77 };
78} // namespace alpaka::internal::intrinsic
79#endif
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
constexpr auto alpaka
Definition fn.hpp:66
int32_t operator()(T_IntrinsicImpl const, T_Arg const &val) const
int32_t operator()(T_IntrinsicImpl const, T_Arg const &val) const
int32_t operator()(T_IntrinsicImpl const, T_Arg const &val) const