alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
warp.hpp File Reference
#include "alpaka/Vec.hpp"
#include "alpaka/interface.hpp"
#include "alpaka/onAcc/Acc.hpp"
#include "alpaka/onAcc/internal/warp.hpp"
#include "alpaka/tag.hpp"
#include <cstdint>

Go to the source code of this file.

Namespaces

namespace  alpaka
 main alpaka namespace.
namespace  alpaka::onAcc
 functionality which is usable on the accelerator compute device from within a kernel.
namespace  alpaka::onAcc::warp

Functions

template<alpaka::onAcc::concepts::Acc T_Acc>
constexpr auto alpaka::onAcc::warp::activemask (T_Acc const &acc) -> std::conditional_t< T_Acc::getWarpSize()<=32u, uint32_t, uint64_t >
 Return the bit-mask of active lanes for the warp associated with the accelerator.
constexpr bool alpaka::onAcc::warp::all (alpaka::onAcc::concepts::Acc auto const &acc, int32_t predicate)
 Evaluates predicate for all active threads of the warp.
constexpr bool alpaka::onAcc::warp::any (alpaka::onAcc::concepts::Acc auto const &acc, int32_t predicate)
 Evaluates predicate for all active threads of the warp.
template<alpaka::onAcc::concepts::Acc T_Acc>
constexpr auto alpaka::onAcc::warp::ballot (T_Acc const &acc, int32_t predicate) -> std::conditional_t< T_Acc::getWarpSize()<=32u, uint32_t, uint64_t >
 Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and only if predicate evaluates to non-zero for the Nth thread of the warp and the Nth thread is active.
constexpr uint32_t alpaka::onAcc::warp::getLaneIdx (alpaka::onAcc::concepts::Acc auto const &acc)
 Return the lane index of the current thread within its warp.
constexpr uint32_t alpaka::onAcc::warp::getWarpIdx (alpaka::onAcc::concepts::Acc auto const &acc)
 Return the warp index within the block.
template<typename T, alpaka::onAcc::concepts::Acc T_Acc>
constexpr T alpaka::onAcc::warp::shfl (T_Acc const &acc, T const &value, uint32_t srcLane, uint32_t width=getSize< T_Acc >())
 Exchange data between threads within a warp.
template<typename T, alpaka::onAcc::concepts::Acc T_Acc>
constexpr T alpaka::onAcc::warp::shflDown (T_Acc const &acc, T const &value, uint32_t delta, uint32_t width=getSize< T_Acc >())
 Read data from threads with higher lane index within a warp.
template<typename T, alpaka::onAcc::concepts::Acc T_Acc>
constexpr T alpaka::onAcc::warp::shflUp (T_Acc const &acc, T const &value, uint32_t delta, uint32_t width=getSize< T_Acc >())
 Read data from threads with lower lane index within a warp.
template<typename T, alpaka::onAcc::concepts::Acc T_Acc>
constexpr T alpaka::onAcc::warp::shflXor (T_Acc const &acc, T const &value, uint32_t laneMask, uint32_t width=getSize< T_Acc >())
 Exchange data between threads within a warp.
template<concepts::Acc T_Acc>
constexpr uint32_t alpaka::onAcc::warp::getSize ()
 Return the warp size.
template<concepts::Acc T_Acc>
constexpr uint32_t alpaka::onAcc::warp::getSize (T_Acc const &)
 Return the warp size.