31 template<
typename T_Platform>
32 struct Device : std::enable_shared_from_this<Device<T_Platform>>
70 std::vector<std::weak_ptr<cpu::Queue<Device>>> tmpQueues;
75 for(
auto& weakQueue : tmpQueues)
77 if(
auto queue = weakQueue.lock())
94 std::vector<std::weak_ptr<cpu::Queue<Device>>>
queues;
95 std::vector<std::weak_ptr<cpu::Event<Device>>>
events;
100 return this->shared_from_this();
103 template<
typename T_Device>
143 "Unsupported queue kind.");
148 auto newQueue = std::make_shared<cpu::Queue<Device>>(
149 std::move(thisHandle),
154 queues.emplace_back(newQueue);
165 auto newEvent = std::make_shared<cpu::Event<Device>>(std::move(thisHandle),
queues.size());
167 events.emplace_back(newEvent);
201 template<
typename T_Platform>
206 template<
typename T_Platform>
212 template<
typename T_Platform>
221 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
235 device.pinPointer(ptr, memSizeInByte);
251 std::stringstream ss;
259 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
269 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
279 template<
typename T_Platform,
typename T_Any>
305 Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, exec::CpuSerial>, T_KernelBundle>
314 alpaka::unused(device, kernelBundle);
319 iotaCVec<
typename T_FrameExtents::type, T_FrameExtents::dim()>())::
template fill<1u>();
320 return ThreadSpec{allOne, allOne, frameSpec.getExecutor()};
326 T_KernelBundle
const& kernelBundle)
const
328 alpaka::unused(device, kernelBundle);
332 iotaCVec<
typename T_FrameExtents::type, T_FrameExtents::dim()>())::
template fill<1u>();
345 Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>, T_KernelBundle>
354 alpaka::unused(device, kernelBundle);
358 auto numThreadBlocks = frameSpec.getNumFrames();
359 return ThreadSpec{numThreadBlocks, T_FrameExtents::template
fill<1u>(), frameSpec.getExecutor()};
365 T_KernelBundle
const& kernelBundle)
const
367 alpaka::unused(device, kernelBundle);
372 auto const numThreads =
Vec<
typename T_FrameExtents::type, T_FrameExtents::dim()>
::fill(1);
377 template<
typename T_Platform>
382 return device.m_properties;
390 template<
typename T_Platform>
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Concept to check if a type is a CVector.
Concept to check for an executor.
Concept to check if a type is a KernelBundle.
Concept to check if a type is a queue kind.
Concept to check if a type is a vector.
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, auto ptr)
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
constexpr bool isSeqExecutor_v
alpaka internal implementations.
constexpr auto getDeviceKind(auto &&any)
constexpr uint32_t allNumaDomains
Constant to select all NUMA domains.
size_t getFreeGlobalMemBytes(uint32_t numaIdx)
Return the number of free bytes in the numa domain.
void setThreadAffinity(uint32_t numaIdx)
Set the affinity of the current thread to all cores of the NUMA domain.
void pinPointer(T *const ptr, size_t bytes, uint32_t numaIdx)
Set the NUMA domain for the memory range described by ptr and bytes.
Functionality which is usable on the host CPU controller thread.
void fill(Queue< T_Device, T_QueueKind > const &queue, auto &&dest, T_Value elementValue)
fill memory element wise
std::shared_ptr< T > Handle
auto getFreeGlobalMemBytes() -> std::size_t
constexpr auto nonBlocking
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
consteval auto iotaCVec()
Create and return a CVector of the given length with values 1, 2, ...
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
Strongly typed and constexpr representation of a byte-alignment of memory.
Description of a specific device that one can schedule kernels on.
Device/Api-agnostic description of the logical parallelism exposed to a kernel.
static constexpr T_Executor getExecutor() noexcept
constexpr NumFramesVecType const & getNumFrames() const noexcept
Life time managed buffer with contiguous data.
Backend-specific description of the actual block and thread launch shape.
bool operator!=(Device const &other) const
uint32_t getNativeHandle() const noexcept
std::vector< std::weak_ptr< cpu::Event< Device > > > events
void pinPointer(T *const ptr, size_t bytes)
Handle< cpu::Queue< Device > > makeQueue(alpaka::concepts::QueueKind auto kind)
auto getDeviceKind() const
Handle< cpu::Event< Device > > makeEvent()
Handle< T_Platform > m_platform
Device(internal::concepts::PlatformHandle auto platform, uint32_t const idx, uint32_t numaIdx)
Device & operator=(Device &&)=delete
std::vector< std::weak_ptr< cpu::Queue< Device > > > queues
DeviceProperties m_properties
std::shared_ptr< Device > getSharedPtr()
auto getFreeGlobalMemBytes() const
std::string getName() const
void setThreadAffinity() const
Device(Device const &)=delete
Device & operator=(Device const &)=delete
bool operator==(Device const &other) const
auto operator()(cpu::Device< T_Platform > &device, T_Extents const &extents) const
auto operator()(cpu::Device< T_Platform > &device, T_Extents const &extents) const