alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
warp.hpp
Go to the documentation of this file.
1/* Copyright 2025 Mehmet Yusufoglu, René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
11
12#include <cstdint>
13#include <type_traits>
14
15#if ALPAKA_LANG_HIP
17{
18 template<alpaka::onAcc::concepts::Acc T_Acc>
19 struct Activemask::Op<T_Acc, api::Hip>
20 {
21 constexpr __device__ auto operator()(T_Acc const&, api::Hip) const
22 {
23 return __ballot(1u);
24 }
25 };
26
27 template<alpaka::onAcc::concepts::Acc T_Acc>
28 struct GetLaneIdx::Op<T_Acc, api::Hip>
29 {
30 constexpr __device__ auto operator()(T_Acc const&, api::Hip) const
31 {
32 // for the host side deduction path, result is wrong but this is fine
33 return __lane_id();
34 }
35 };
36
37 template<alpaka::onAcc::concepts::Acc T_Acc>
38 struct GetWarpIdx::Op<T_Acc, api::Hip>
39 {
40 constexpr __device__ uint32_t operator()(T_Acc const& acc, api::Hip) const
41 {
42 constexpr uint32_t warpExtent = onAcc::warp::internal::getSize<ALPAKA_TYPEOF(acc)>();
43 alpaka::concepts::Vector auto blockThreadCount
44 = acc.getExtentsOf(onAcc::origin::block, onAcc::unit::threads);
45 alpaka::concepts::Vector auto threadIdxInBlock
46 = acc.getIdxWithin(alpaka::onAcc::origin::block, alpaka::onAcc::unit::threads);
47 return linearize(blockThreadCount, threadIdxInBlock) / warpExtent;
48 }
49 };
50
51 template<alpaka::onAcc::concepts::Acc T_Acc>
52 struct All::Op<T_Acc, api::Hip>
53 {
54 constexpr __device__ bool operator()(T_Acc const&, api::Hip, int32_t predicate) const
55 {
56 return __all(static_cast<int>(predicate)) != 0u;
57 }
58 };
59
60 template<alpaka::onAcc::concepts::Acc T_Acc>
61 struct Any::Op<T_Acc, api::Hip>
62 {
63 constexpr __device__ bool operator()(T_Acc const&, api::Hip, int32_t predicate) const
64 {
65 return __any(static_cast<int>(predicate)) != 0;
66 }
67 };
68
69 template<alpaka::onAcc::concepts::Acc T_Acc>
70 struct Ballot::Op<T_Acc, api::Hip>
71 {
72 constexpr __device__ auto operator()(T_Acc const&, api::Hip, int32_t predicate) const
73 {
74 return __ballot(static_cast<int>(predicate));
75 }
76 };
77
78 template<alpaka::onAcc::concepts::Acc T_Acc, typename T>
79 struct Shfl::Op<T_Acc, api::Hip, T>
80 {
81 constexpr __device__ T
82 operator()(T_Acc const&, api::Hip, T const& value, uint32_t srcLane, uint32_t width) const
83 {
84 return __shfl(value, static_cast<int>(srcLane), static_cast<int>(width));
85 }
86 };
87
88 template<alpaka::onAcc::concepts::Acc T_Acc, typename T>
89 struct ShflDown::Op<T_Acc, api::Hip, T>
90 {
91 constexpr __device__ T operator()(T_Acc const&, api::Hip, T const& value, uint32_t delta, uint32_t width) const
92 {
93 return __shfl_down(value, static_cast<int>(delta), static_cast<int>(width));
94 }
95 };
96
97 template<alpaka::onAcc::concepts::Acc T_Acc, typename T>
98 struct ShflUp::Op<T_Acc, api::Hip, T>
99 {
100 constexpr __device__ T operator()(T_Acc const&, api::Hip, T const& value, uint32_t delta, uint32_t width) const
101 {
102 return __shfl_up(value, static_cast<int>(delta), static_cast<int>(width));
103 }
104 };
105
106 template<alpaka::onAcc::concepts::Acc T_Acc, typename T>
107 struct ShflXor::Op<T_Acc, api::Hip, T>
108 {
109 constexpr __device__ T
110 operator()(T_Acc const&, api::Hip, T const& value, uint32_t laneMask, uint32_t width) const
111 {
112 return __shfl_xor(value, static_cast<int>(laneMask), static_cast<int>(width));
113 }
114 };
115} // namespace alpaka::onAcc::warp::internal
116#endif
constexpr Api api
Definition tag.hpp:24
constexpr uint32_t getSize()
Definition warp.hpp:27
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.
Definition Vec.hpp:832
constexpr auto operator()(T_Acc const &, T_Api) const
Definition warp.hpp:38
constexpr bool operator()(T_Acc const &, T_Api, int32_t predicate) const
Definition warp.hpp:93
constexpr bool operator()(T_Acc const &, T_Api, int32_t predicate) const
Definition warp.hpp:107
constexpr auto operator()(T_Acc const &, T_Api, int32_t predicate) const
Definition warp.hpp:121
constexpr auto operator()(T_Acc const &, T_Api) const
Definition warp.hpp:51
constexpr auto operator()(T_Acc const &, T_Api) const
Definition warp.hpp:72
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t delta, uint32_t width) const
Definition warp.hpp:149
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t delta, uint32_t width) const
Definition warp.hpp:163
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t laneMask, uint32_t width) const
Definition warp.hpp:177
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t srcLane, uint32_t width) const
Definition warp.hpp:135