16# include <sycl/sycl.hpp>
18namespace alpaka::onAcc::warp::internal
21 template<alpaka::onAcc::concepts::Acc T_Acc>
22 struct Activemask::Op<T_Acc,
api::OneApi>
24 auto operator()(T_Acc
const&, api::OneApi)
const
26 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
31 static auto getMask(
auto const subGroup)
33 auto sgMask = sycl::ext::oneapi::group_ballot(subGroup,
true);
35 constexpr auto const warpSize = T_Acc::getWarpSize();
36 using ReturnType = std::conditional_t<warpSize <= 32, uint32_t, uint64_t>;
38 sgMask.extract_bits(mask, 0u);
43 template<alpaka::onAcc::concepts::Acc T_Acc>
44 struct GetLaneIdx::Op<T_Acc,
api::OneApi>
46 constexpr auto operator()(T_Acc
const&, api::OneApi)
const
48 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
50 return sg.get_local_id()[0];
54 template<alpaka::onAcc::concepts::Acc T_Acc>
55 struct GetWarpIdx::Op<T_Acc,
api::OneApi>
57 constexpr auto operator()(T_Acc
const&, api::OneApi)
const
59 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
61 return sg.get_group_linear_id();
65 template<alpaka::onAcc::concepts::Acc T_Acc>
66 struct All::Op<T_Acc,
api::OneApi>
68 bool operator()(T_Acc
const& acc, api::OneApi, int32_t predicate)
const
77 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
78 auto activeMask = Activemask::Op<T_Acc, api::OneApi>::getMask(sg);
79 auto sgMask = sycl::ext::oneapi::group_ballot(sg, predicate != 0);
81 constexpr auto const warpSize = T_Acc::getWarpSize();
82 using ReturnType = std::conditional_t<warpSize <= 32, uint32_t, uint64_t>;
83 ReturnType predicateMask;
84 sgMask.extract_bits(predicateMask, 0u);
85 return activeMask & predicateMask == activeMask;
89 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
90 return sycl::all_of_group(sg, predicate != 0);
95 template<alpaka::onAcc::concepts::Acc T_Acc>
96 struct Any::Op<T_Acc,
api::OneApi>
98 bool operator()(T_Acc
const& acc, api::OneApi, int32_t predicate)
const
107 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
108 auto activeMask = Activemask::Op<T_Acc, api::OneApi>::getMask(sg);
109 auto sgMask = sycl::ext::oneapi::group_ballot(sg, predicate != 0);
111 constexpr auto const warpSize = T_Acc::getWarpSize();
112 using ReturnType = std::conditional_t<warpSize <= 32, uint32_t, uint64_t>;
113 ReturnType predicateMask;
114 sgMask.extract_bits(predicateMask, 0u);
115 return activeMask & predicateMask;
119 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
120 return sycl::any_of_group(sg, predicate != 0);
125 template<alpaka::onAcc::concepts::Acc T_Acc>
126 struct Ballot::Op<T_Acc,
api::OneApi>
128 auto operator()(T_Acc
const&, api::OneApi, int32_t predicate)
const
130 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
131 auto sgMask = sycl::ext::oneapi::group_ballot(sg, predicate != 0);
133 constexpr auto const warpSize = T_Acc::getWarpSize();
134 using ReturnType = std::conditional_t<warpSize <= 32, uint32_t, uint64_t>;
136 sgMask.extract_bits(mask, 0u);
141 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
142 struct Shfl::Op<T_Acc,
api::OneApi, T>
144 constexpr T operator()(T_Acc
const&, api::OneApi, T
const& value, uint32_t srcLane, uint32_t width)
const
146 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
147 uint32_t laneIdxInWarp = sg.get_local_id()[0];
148 uint32_t partitionOffset = (laneIdxInWarp / width) * width;
149 uint32_t srcInPartitionLaneIdx = partitionOffset + (srcLane % width);
151 return sycl::select_from_group(sg, value, srcInPartitionLaneIdx);
155 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
156 struct ShflDown::Op<T_Acc,
api::OneApi, T>
158 constexpr T operator()(T_Acc
const&, api::OneApi, T
const& value, uint32_t delta, uint32_t width)
const
160 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
162 uint32_t laneIdxInWarp = sg.get_local_id()[0];
163 uint32_t groupEndIdx = (laneIdxInWarp / width + 1) * width;
165 T result = sycl::shift_group_left(sg, value, delta);
166 if(laneIdxInWarp + delta >= groupEndIdx)
172 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
173 struct ShflUp::Op<T_Acc,
api::OneApi, T>
175 constexpr T operator()(T_Acc
const&, api::OneApi, T
const& value, uint32_t delta, uint32_t width)
const
177 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
179 uint32_t laneIdxInWarp = sg.get_local_id()[0];
180 uint32_t groupStartIdx = (laneIdxInWarp / width) * width;
182 T result = sycl::shift_group_right(sg, value, delta);
183 if(laneIdxInWarp - groupStartIdx < delta)
189 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
190 struct ShflXor::Op<T_Acc,
api::OneApi, T>
192 constexpr T operator()(T_Acc
const&, api::OneApi, T
const& value, uint32_t laneMask, uint32_t width)
const
194 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
195 uint32_t laneIdxInWarp = sg.get_local_id()[0];
196 uint32_t groupStartIdx = (laneIdxInWarp / width) * width;
197 uint32_t
const relativeIdx = laneIdxInWarp - groupStartIdx;
198 uint32_t
const sourceLane = (relativeIdx % width) ^ laneMask;
199 return sycl::select_from_group(sg, value, sourceLane < width ? groupStartIdx + sourceLane : laneIdxInWarp);
#define ALPAKA_TYPEOF(...)
Get the type of instance.
constexpr WarpSize warpSize
constexpr DeviceKind deviceKind