18 template<alpaka::onAcc::concepts::Acc T_Acc>
21 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda)
const
23 return __activemask();
27 template<alpaka::onAcc::concepts::Acc T_Acc>
30 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda)
const
34 asm volatile(
"mov.u32 %0, %laneid;" :
"=r"(lIdx));
36 asm(
"mov.u32 %0, %%laneid;" :
"=r"(lIdx));
42 template<alpaka::onAcc::concepts::Acc T_Acc>
45 constexpr __device__ uint32_t
operator()(T_Acc
const& acc, api::Cuda)
const
48 alpaka::concepts::Vector
auto blockThreadCount
49 = acc.getExtentsOf(onAcc::origin::block, onAcc::unit::threads);
50 alpaka::concepts::Vector
auto threadIdxInBlock
51 = acc.getIdxWithin(alpaka::onAcc::origin::block, alpaka::onAcc::unit::threads);
52 return linearize(blockThreadCount, threadIdxInBlock) / warpExtent;
56 template<alpaka::onAcc::concepts::Acc T_Acc>
59 constexpr __device__
bool operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
61 return __all_sync(__activemask(),
static_cast<int>(predicate)) != 0;
65 template<alpaka::onAcc::concepts::Acc T_Acc>
68 constexpr __device__
bool operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
70 return __any_sync(__activemask(),
static_cast<int>(predicate)) != 0;
74 template<alpaka::onAcc::concepts::Acc T_Acc>
77 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
79 return __ballot_sync(__activemask(),
static_cast<int>(predicate));
83 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
86 constexpr __device__ T
87 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t srcLane, uint32_t width)
const
89 return __shfl_sync(__activemask(), value,
static_cast<int>(srcLane),
static_cast<int>(width));
93 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
96 constexpr __device__ T
97 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t delta, uint32_t width)
const
99 return __shfl_down_sync(__activemask(), value,
static_cast<int>(delta),
static_cast<int>(width));
103 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
106 constexpr __device__ T
107 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t delta, uint32_t width)
const
109 return __shfl_up_sync(__activemask(), value,
static_cast<int>(delta),
static_cast<int>(width));
113 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
116 constexpr __device__ T
117 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t laneMask, uint32_t width)
const
119 return __shfl_xor_sync(__activemask(), value,
static_cast<int>(laneMask),
static_cast<int>(width));
constexpr uint32_t getSize()
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.
constexpr auto operator()(T_Acc const &, T_Api) const
constexpr bool operator()(T_Acc const &, T_Api, int32_t predicate) const
constexpr bool operator()(T_Acc const &, T_Api, int32_t predicate) const
constexpr auto operator()(T_Acc const &, T_Api, int32_t predicate) const
constexpr auto operator()(T_Acc const &, T_Api) const
constexpr auto operator()(T_Acc const &, T_Api) const
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t delta, uint32_t width) const
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t delta, uint32_t width) const
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t laneMask, uint32_t width) const
constexpr T operator()(T_Acc const &, T_Api, T const &value, uint32_t srcLane, uint32_t width) const