16namespace alpaka::onAcc::warp::internal
18 template<alpaka::onAcc::concepts::Acc T_Acc>
19 struct Activemask::Op<T_Acc,
api::Cuda>
21 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda)
const
23 return __activemask();
27 template<alpaka::onAcc::concepts::Acc T_Acc>
28 struct GetLaneIdx::Op<T_Acc,
api::Cuda>
30 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda)
const
34 asm volatile(
"mov.u32 %0, %laneid;" :
"=r"(lIdx));
36 asm(
"mov.u32 %0, %%laneid;" :
"=r"(lIdx));
42 template<alpaka::onAcc::concepts::Acc T_Acc>
43 struct GetWarpIdx::Op<T_Acc,
api::Cuda>
45 constexpr __device__ uint32_t operator()(T_Acc
const& acc, api::Cuda)
const
47 constexpr uint32_t warpExtent = onAcc::warp::internal::getSize<ALPAKA_TYPEOF(acc)>();
48 alpaka::concepts::Vector
auto blockThreadCount
49 = acc.getExtentsOf(onAcc::origin::block, onAcc::unit::threads);
50 alpaka::concepts::Vector
auto threadIdxInBlock
51 = acc.getIdxWithin(alpaka::onAcc::origin::block, alpaka::onAcc::unit::threads);
52 return linearize(blockThreadCount, threadIdxInBlock) / warpExtent;
56 template<alpaka::onAcc::concepts::Acc T_Acc>
57 struct All::Op<T_Acc,
api::Cuda>
59 constexpr __device__
bool operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
61 return __all_sync(__activemask(),
static_cast<int>(predicate)) != 0;
65 template<alpaka::onAcc::concepts::Acc T_Acc>
66 struct Any::Op<T_Acc,
api::Cuda>
68 constexpr __device__
bool operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
70 return __any_sync(__activemask(),
static_cast<int>(predicate)) != 0;
74 template<alpaka::onAcc::concepts::Acc T_Acc>
75 struct Ballot::Op<T_Acc,
api::Cuda>
77 constexpr __device__
auto operator()(T_Acc
const&, api::Cuda, int32_t predicate)
const
79 return __ballot_sync(__activemask(),
static_cast<int>(predicate));
83 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
84 struct Shfl::Op<T_Acc,
api::Cuda, T>
86 constexpr __device__ T
87 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t srcLane, uint32_t width)
const
89 return __shfl_sync(__activemask(), value,
static_cast<int>(srcLane),
static_cast<int>(width));
93 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
94 struct ShflDown::Op<T_Acc,
api::Cuda, T>
96 constexpr __device__ T
97 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t delta, uint32_t width)
const
99 return __shfl_down_sync(__activemask(), value,
static_cast<int>(delta),
static_cast<int>(width));
103 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
104 struct ShflUp::Op<T_Acc,
api::Cuda, T>
106 constexpr __device__ T
107 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t delta, uint32_t width)
const
109 return __shfl_up_sync(__activemask(), value,
static_cast<int>(delta),
static_cast<int>(width));
113 template<alpaka::onAcc::concepts::Acc T_Acc,
typename T>
114 struct ShflXor::Op<T_Acc,
api::Cuda, T>
116 constexpr __device__ T
117 operator()(T_Acc
const&, api::Cuda, T
const& value, uint32_t laneMask, uint32_t width)
const
119 return __shfl_xor_sync(__activemask(), value,
static_cast<int>(laneMask),
static_cast<int>(width));
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.