alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
intrinsic.hpp
Go to the documentation of this file.
1/* Copyright 2025 Luca Venerando Greco, René Widera, Jan Stephan
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
11
12#if ALPAKA_LANG_SYCL
13
14# include <sycl/sycl.hpp>
15
16namespace alpaka::internal::intrinsic
17{
18 template<typename T_Arg>
19 struct Popcount::Op<alpaka::internal::SyclIntrinsic, T_Arg>
20 {
21 constexpr auto operator()(alpaka::internal::SyclIntrinsic const, T_Arg const& val) const
22 {
23 if constexpr(sizeof(T_Arg) == 4u)
24 {
25 return sycl::popcount(std::bit_cast<unsigned int>(val));
26 }
27 else if constexpr(sizeof(T_Arg) == 8u)
28 {
29 return sycl::popcount(std::bit_cast<unsigned long long>(val));
30 }
31 else
32 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
33
34 ALPAKA_UNREACHABLE(int{});
35 }
36 };
37
38 template<typename T_Arg>
39 struct Ffs::Op<alpaka::internal::SyclIntrinsic, T_Arg>
40 {
41 constexpr auto operator()(alpaka::internal::SyclIntrinsic const, T_Arg const& val) const
42 {
43 // There is no FFS operation in SYCL but we can emulate it using popcount.
44 if constexpr(sizeof(T_Arg) == 4u)
45 {
46 auto value = std::bit_cast<unsigned int>(val);
47 return (value == 0u) ? 0 : sycl::popcount(value ^ ~(-value));
48 }
49 else if constexpr(sizeof(T_Arg) == 8u)
50 {
51 auto value = std::bit_cast<unsigned long long>(val);
52 return (value == 0u) ? 0 : sycl::popcount(value ^ ~(-value));
53 }
54 else
55 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
56
57 ALPAKA_UNREACHABLE(int{});
58 }
59 };
60
61 template<typename T_Arg>
62 struct Clz::Op<alpaka::internal::SyclIntrinsic, T_Arg>
63 {
64 constexpr auto operator()(alpaka::internal::SyclIntrinsic const, T_Arg const& val) const
65 {
66 if constexpr(sizeof(T_Arg) == 4u)
67 {
68 auto value = std::bit_cast<unsigned int>(val);
69 return sycl::clz(value);
70 }
71 else if constexpr(sizeof(T_Arg) == 8u)
72 {
73 auto value = std::bit_cast<unsigned long long>(val);
74 return sycl::clz(value);
75 }
76 else
77 static_assert(!sizeof(T_Arg), "Unsupported data type, sizeof() must be 4 or 8");
78
79 ALPAKA_UNREACHABLE(int{});
80 }
81 };
82} // namespace alpaka::internal::intrinsic
83
84#endif
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
constexpr auto alpaka
Definition fn.hpp:66