latest/doxygen/api_2host_2Device_8hpp_source.html

/* Copyright 2024 René Widera, Mehmet Yusufoglu

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "alpaka/api/host/Api.hpp"

#include "alpaka/api/host/Event.hpp"

#include "alpaka/api/host/Queue.hpp"

#include "alpaka/api/host/hwloc/utility.hpp"

#include "alpaka/api/host/sysInfo.hpp"

#include "alpaka/api/util.hpp"

#include "alpaka/core/alignedAlloc.hpp"

#include "alpaka/internal/interface.hpp"

#include "alpaka/onHost/Device.hpp"

#include "alpaka/onHost/DeviceProperties.hpp"

#include "alpaka/onHost/Handle.hpp"

#include "alpaka/onHost/mem/SharedBuffer.hpp"

#include "alpaka/onHost/trait.hpp"

#include "alpaka/tag.hpp"

#include "alpaka/utility.hpp"


#include <cstdint>

#include <memory>

#include <sstream>


namespace alpaka::onHost

{


    namespace cpu

    {

        template<typename T_Platform>


        struct Device : std::enable_shared_from_this<Device<T_Platform>>

        {

        public:


            Device(internal::concepts::PlatformHandle auto platform, uint32_t const idx, uint32_t cpuGroupIdx)

                : m_platform(std::move(platform))

                , m_idx(idx)

                , m_cpuGroupIdx(cpuGroupIdx)

                , m_properties{internal::getDeviceProperties(*m_platform.get(), m_idx)}

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::device);

            }


            ~Device()

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::device);

            }


            Device(Device const&) = delete;

            Device& operator=(Device const&) = delete;


            Device(Device&&) = delete;

            Device& operator=(Device&&) = delete;


            bool operator==(Device const& other) const

            {

                return m_idx == other.m_idx;

            }


            bool operator!=(Device const& other) const

            {

                return m_idx != other.m_idx;

            }


            void wait()

            {

                ALPAKA_LOG_FUNCTION(alpaka::onHost::logger::device);

                // Host device synchronization - wait on all queues associated with this device.

                // IMPORTANT: Do not hold queuesGuard across potentially long waits; copy weak refs first.

                std::vector<std::weak_ptr<cpu::Queue<Device>>> tmpQueues;

                {

                    std::lock_guard<std::mutex> lk{queuesGuard};

                    tmpQueues = queues; // copy weak_ptr list

                }

                for(auto& weakQueue : tmpQueues)

                {

                    if(auto queue = weakQueue.lock())

                    {

                        internal::wait(*queue);

                    }

                }

            }


        private:

            void _()

            {

                static_assert(internal::concepts::Device<Device>);

            }


            Handle<T_Platform> m_platform;

            uint32_t m_idx = 0u;

            uint32_t m_cpuGroupIdx = internal::hwloc::allDomains;

            DeviceProperties m_properties;

            std::vector<std::weak_ptr<cpu::Queue<Device>>> queues;

            std::vector<std::weak_ptr<cpu::Event<Device>>> events;

            std::mutex queuesGuard;


            std::shared_ptr<Device> getSharedPtr()

            {

                return this->shared_from_this();

            }


            template<typename T_Device>

            friend struct Queue;


            void setThreadAffinity() const

            {

                internal::hwloc::setThreadAffinity(m_cpuGroupIdx);

            }


            template<typename T>

            void pinPointer(T* const ptr, size_t bytes)

            {

                internal::hwloc::pinPointer(ptr, bytes, m_cpuGroupIdx);

            }


            bool isNumaAware() const

            {

                return m_cpuGroupIdx != internal::hwloc::allDomains;

            }


            friend struct alpaka::internal::GetName;


            std::string getName() const

            {

                return m_properties.name;

            }


            friend struct internal::GetNativeHandle;


            [[nodiscard]] uint32_t getNativeHandle() const noexcept

            {

                return m_idx;

            }


            friend struct internal::MakeQueue;


            Handle<cpu::Queue<Device>> makeQueue(alpaka::concepts::QueueKind auto kind)

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::queue);

                static_assert(

                    kind == queueKind::blocking || kind == queueKind::nonBlocking,

                    "Unsupported queue kind.");

                auto thisHandle = this->getSharedPtr();

                std::lock_guard<std::mutex> lk{queuesGuard};


                constexpr bool isBlocking = kind == queueKind::blocking;

                auto newQueue = std::make_shared<cpu::Queue<Device>>(

                    std::move(thisHandle),

                    queues.size(),

                    m_cpuGroupIdx,

                    isBlocking);


                queues.emplace_back(newQueue);

                return newQueue;

            }


            friend struct internal::MakeEvent;


            Handle<cpu::Event<Device>> makeEvent()

            {

                ALPAKA_LOG_FUNCTION(alpaka::onHost::logger::event);

                auto thisHandle = this->getSharedPtr();

                std::lock_guard<std::mutex> lk{queuesGuard};

                auto newEvent = std::make_shared<cpu::Event<Device>>(std::move(thisHandle), queues.size());


                events.emplace_back(newEvent);

                return newEvent;

            }


            friend struct alpaka::internal::GetDeviceType;


            auto getDeviceKind() const

            {

                return alpaka::internal::getDeviceKind(*m_platform.get());

            }


            auto getFreeGlobalMemBytes() const

            {

#if ALPAKA_HAS_HWLOC

                if(isNumaAware())

                    return internal::hwloc::getFreeGlobalMemBytes(m_cpuGroupIdx);

#endif

                return onHost::getFreeGlobalMemBytes();

            }


            friend struct internal::Alloc;

            friend struct alpaka::internal::GetApi;

            friend struct internal::GetDeviceProperties;

            friend struct internal::GetFreeGlobalMemBytes;

            friend struct internal::AdjustThreadSpec;

            friend struct internal::AllocDeferred;

            friend struct internal::AllocUnified;

            friend struct internal::AllocMapped;

        };


    } // namespace cpu


    namespace trait


    {

        template<typename T_Platform>


        struct IsExecutorSupportedBy::Op<exec::CpuSerial, cpu::Device<T_Platform>> : std::true_type

        {

        };


#if ALPAKA_OMP

        template<typename T_Platform>

        struct IsExecutorSupportedBy::Op<exec::CpuOmpBlocks, cpu::Device<T_Platform>> : std::true_type

        {

        };

#endif

#if ALPAKA_TBB

        template<typename T_Platform>

        struct IsExecutorSupportedBy::Op<exec::CpuTbbBlocks, cpu::Device<T_Platform>> : std::true_type

        {

        };

#endif

    } // namespace trait


    namespace internal

    {

        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct Alloc::Op<T_Type, cpu::Device<T_Platform>, T_Extents>

        {

            auto operator()(cpu::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                constexpr uint32_t alignment = api::util::simdOptimizedAlignment<T_Type>(

                    ALPAKA_TYPEOF(getApi(device)){},

                    ALPAKA_TYPEOF(getDeviceKind(device)){});

                auto [memSizeInByte, pitches] = api::util::emulatedAlignedMemDescription<T_Type>(alignment, extents);


                auto deviceDependency = onHost::Device{device.getSharedPtr()};


                T_Type* ptr = reinterpret_cast<T_Type*>(alpaka::core::alignedAlloc(alignment, memSizeInByte));

                device.pinPointer(ptr, memSizeInByte);

                // deviceDependency is captured to keep the device alive until the memory is deleted

                auto deleter = [ptr, deviceDependency]() { alpaka::core::alignedFree(alignment, ptr); };


                auto sharedBuffer = onHost::SharedBuffer{

                    deviceDependency,

                    ptr,

                    extents,

                    pitches,

                    std::move(deleter),

                    Alignment<alignment>{}};


                ALPAKA_LOG_INFO(

                    onHost::logger::memory + onHost::logger::device,

                    [&]()

                    {

                        std::stringstream ss;

                        ss << sharedBuffer;

                        return ss.str();

                    });

                return sharedBuffer;

            }

        };


        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct AllocUnified::Op<T_Type, cpu::Device<T_Platform>, T_Extents>

        {

            auto operator()(cpu::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                return Alloc::Op<T_Type, cpu::Device<T_Platform>, T_Extents>{}(device, extents);

            }

        };


        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct AllocMapped::Op<T_Type, cpu::Device<T_Platform>, T_Extents>

        {

            auto operator()(cpu::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                return Alloc::Op<T_Type, cpu::Device<T_Platform>, T_Extents>{}(device, extents);

            }

        };


        template<typename T_Platform, typename T_Any>

        struct IsDataAccessible::FirstPath<cpu::Device<T_Platform>, T_Any>

        {

            bool operator()(cpu::Device<T_Platform>& device, T_Any const& view) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                if constexpr(

                    ALPAKA_TYPEOF(getApi(view)){} == api::host

                    && (ALPAKA_TYPEOF(getDeviceKind(device)){} == deviceKind::cpu

                        || ALPAKA_TYPEOF(getDeviceKind(device)){} == deviceKind::numaCpu))

                    return true;

                else

                    return false;

            }

        };


        /** Set number of thread blocks and threads per block to one

         *

         * There is no need to emulate blocks if we have only one thread.

         */

        template<

            typename T_Platform,

            alpaka::concepts::Vector T_NumFrames,

            alpaka::concepts::Vector T_FrameExtents,

            alpaka::concepts::KernelBundle T_KernelBundle>

        struct AdjustThreadSpec::

            Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, exec::CpuSerial>, T_KernelBundle>

        {

            using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, exec::CpuSerial>;


            auto operator()(

                cpu::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const requires alpaka::concepts::CVector<T_FrameExtents>

            {

                alpaka::unused(device, kernelBundle);

                ALPAKA_LOG_FUNCTION(onHost::logger::kernel);


                /// @todo add shortcut to create a CVec with equal values

                auto const allOne = ALPAKA_TYPEOF(

                    iotaCVec<typename T_FrameExtents::type, T_FrameExtents::dim()>())::template fill<1u>();

                return ThreadSpec{allOne, allOne, frameSpec.getExecutor()};

            }


            auto operator()(

                cpu::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const

            {

                alpaka::unused(device, kernelBundle);

                ALPAKA_LOG_FUNCTION(onHost::logger::kernel);

                /// @todo add shortcut to create a CVec with equal values

                auto const allOne = ALPAKA_TYPEOF(

                    iotaCVec<typename T_FrameExtents::type, T_FrameExtents::dim()>())::template fill<1u>();

                return ThreadSpec{allOne, allOne, frameSpec.getExecutor()};

            }

        };


        template<

            typename T_Platform,

            alpaka::concepts::Executor T_Executor,

            alpaka::concepts::Vector T_NumFrames,

            alpaka::concepts::Vector T_FrameExtents,

            alpaka::concepts::KernelBundle T_KernelBundle>

        requires exec::isSeqExecutor_v<T_Executor>

        struct AdjustThreadSpec::

            Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>, T_KernelBundle>

        {

            using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>;


            auto operator()(

                cpu::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const requires alpaka::concepts::CVector<T_FrameExtents>

            {

                alpaka::unused(device, kernelBundle);

                ALPAKA_LOG_FUNCTION(onHost::logger::kernel);


                // map the number of frames to thread blocks

                auto numThreadBlocks = frameSpec.getNumFrames();

                return ThreadSpec{numThreadBlocks, T_FrameExtents::template fill<1u>(), frameSpec.getExecutor()};

            }


            auto operator()(

                cpu::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const

            {

                alpaka::unused(device, kernelBundle);

                ALPAKA_LOG_FUNCTION(alpaka::onHost::logger::kernel);


                // map the number of frames to thread blocks

                auto numThreadBlocks = frameSpec.getNumFrames();

                auto const numThreads = Vec<typename T_FrameExtents::type, T_FrameExtents::dim()>::fill(1);

                return ThreadSpec{numThreadBlocks, numThreads, frameSpec.getExecutor()};

            }

        };


        template<typename T_Platform>

        struct GetDeviceProperties::Op<cpu::Device<T_Platform>>

        {

            DeviceProperties operator()(cpu::Device<T_Platform> const& device) const

            {

                return device.m_properties;

            }

        };

    } // namespace internal

} // namespace alpaka::onHost


namespace alpaka::internal

{

    template<typename T_Platform>

    struct GetApi::Op<onHost::cpu::Device<T_Platform>>

    {

        inline constexpr auto operator()(auto&& device) const

        {

            return alpaka::getApi(device.m_platform);

        }

    };

} // namespace alpaka::internal

DeviceProperties.hpp

Handle.hpp

SharedBuffer.hpp

alignedAlloc.hpp

Event.hpp

Queue.hpp

utility.hpp

util.hpp

ALPAKA_TYPEOF
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:154

Api.hpp

interface.hpp

ALPAKA_LOG_INFO
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
Definition logger.hpp:106

ALPAKA_LOG_FUNCTION
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95

alpaka::api::util::emulatedAlignedMemDescription
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
Definition util.hpp:101

alpaka::api::util::simdOptimizedAlignment
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
Definition util.hpp:141

alpaka::api::host
constexpr auto host
Definition Api.hpp:39

alpaka::core::alignedFree
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, auto ptr)
Definition alignedAlloc.hpp:27

alpaka::core::alignedAlloc
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
Definition alignedAlloc.hpp:15

alpaka::deviceKind::cpu
constexpr auto cpu
Definition tag.hpp:168

alpaka::deviceKind::numaCpu
constexpr auto numaCpu
Definition tag.hpp:178

alpaka::exec
Definition executor.hpp:16

alpaka::exec::isSeqExecutor_v
constexpr bool isSeqExecutor_v
Definition tag.hpp:285

alpaka::onAcc::scope::device
constexpr Device device
Definition scope.hpp:70

alpaka::onHost::cpu
Definition Device.hpp:30

alpaka::onHost::logger::queue
constexpr auto queue
Definition lvl.hpp:127

alpaka::onHost::logger::device
constexpr auto device
Definition lvl.hpp:82

alpaka::onHost::logger::kernel
constexpr auto kernel
Definition lvl.hpp:142

alpaka::onHost::logger::memory
constexpr auto memory
Definition lvl.hpp:112

alpaka::onHost::logger::event
constexpr auto event
Definition lvl.hpp:97

alpaka::onHost::trait
Definition Api.hpp:46

alpaka::onHost
Functionality which is usable on the host CPU controller thread.
Definition api.hpp:40

alpaka::onHost::SharedBuffer
SharedBuffer(T_Any const &, T_Type *, T_UserExtents const &, T_UserPitches const &, std::invocable<> auto, T_MemAlignment const) -> SharedBuffer< ALPAKA_TYPEOF(getApi(std::declval< T_Any >())), T_Type, typename T_UserPitches::UniVec, T_MemAlignment >

alpaka::onHost::fill
void fill(Queue< T_Device, T_QueueKind > const &queue, auto &&dest, T_Value elementValue)
fill memory element wise
Definition Queue.hpp:366

alpaka::onHost::Handle
std::shared_ptr< T > Handle
Definition Handle.hpp:30

alpaka::onHost::getFreeGlobalMemBytes
auto getFreeGlobalMemBytes() -> std::size_t
Definition sysInfo.hpp:210

alpaka::onHost::ThreadSpec
ThreadSpec(T_NumBlocks const &, T_NumThreads const &) -> ThreadSpec< alpaka::trait::getVec_t< T_NumBlocks >, alpaka::trait::getVec_t< T_NumThreads > >

alpaka::onHost::Device
Device(Handle< T_Device > &&) -> Device< ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval< T_Device >())), ALPAKA_TYPEOF(alpaka::internal::getDeviceKind(std::declval< T_Device >()))>

alpaka::queueKind::blocking
constexpr auto blocking
Definition tag.hpp:99

alpaka::queueKind::nonBlocking
constexpr auto nonBlocking
Definition tag.hpp:111

alpaka::Vec
ALPAKA_FN_HOST_ACC Vec(T_1, T_Args...) -> Vec< T_1, uint32_t(sizeof...(T_Args)+1u), ArrayStorage< T_1, uint32_t(sizeof...(T_Args)+1u)> >

alpaka::getDeviceKind
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
Definition interface.hpp:78

alpaka::getApi
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
Definition interface.hpp:42

alpaka::iotaCVec
consteval auto iotaCVec()
Create and return a CVector of the given length with values 1, 2, ...
Definition CVec.hpp:135

alpaka::get
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
Definition Dict.hpp:156

std
STL namespace.

Device.hpp

trait.hpp

alpaka::onHost::DeviceProperties
Properties of a device.
Definition DeviceProperties.hpp:29

alpaka::onHost::DeviceProperties::name
std::string name
The name of the device.
Definition DeviceProperties.hpp:47

alpaka::onHost::cpu::Device
Definition Device.hpp:33

alpaka::onHost::cpu::Device::operator!=
bool operator!=(Device const &other) const
Definition Device.hpp:60

alpaka::onHost::cpu::Device::wait
void wait()
Definition Device.hpp:65

alpaka::onHost::cpu::Device::~Device
~Device()
Definition Device.hpp:44

alpaka::onHost::cpu::Device::operator=
Device & operator=(Device &&)=delete

alpaka::onHost::cpu::Device::Device
Device(Device &&)=delete

alpaka::onHost::cpu::Device::Queue
friend struct Queue
Definition Device.hpp:104

alpaka::onHost::cpu::Device::Device
Device(Device const &)=delete

alpaka::onHost::cpu::Device::operator=
Device & operator=(Device const &)=delete

alpaka::onHost::cpu::Device::operator==
bool operator==(Device const &other) const
Definition Device.hpp:55

alpaka::onHost::cpu::Device::Device
Device(internal::concepts::PlatformHandle auto platform, uint32_t const idx, uint32_t cpuGroupIdx)
Definition Device.hpp:35

alpaka::onHost::trait::IsExecutorSupportedBy::Op
Definition trait.hpp:33

sysInfo.hpp

tag.hpp

utility.hpp