latest/doxygen_dev/api_2syclGeneric_2Device_8hpp_source.html

/* Copyright 2025 Simeon Ehrig, René Widera

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "Queue.hpp"

#include "alpaka/Vec.hpp"

#include "alpaka/api/syclGeneric/Event.hpp"

#include "alpaka/api/syclGeneric/Queue.hpp"

#include "alpaka/api/util.hpp"

#include "alpaka/core/config.hpp"

#include "alpaka/onHost/mem/SharedBuffer.hpp"


#if ALPAKA_LANG_SYCL


#    include <sycl/sycl.hpp>


namespace alpaka::onHost

{

    namespace syclGeneric

    {

        template<typename T_Platform>

        struct Device : std::enable_shared_from_this<Device<T_Platform>>

        {

        public:

            Device(internal::concepts::PlatformHandle auto platform, auto const& dev, uint32_t const idx)

                : m_platform(std::move(platform))

                , m_idx(idx)

                , m_sycl_dev(dev)

                , m_properties{internal::getDeviceProperties(*m_platform.get(), m_idx)}

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::device);

            }


            ~Device()

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::device);

            }


            Device(Device const&) = delete;

            Device& operator=(Device const&) = delete;


            Device(Device&&) = delete;

            Device& operator=(Device&&) = delete;


            auto getName() const

            {

                return m_sycl_dev.get_info<sycl::info::device::name>();

            }


            std::shared_ptr<Device<T_Platform>> getSharedPtr()

            {

                return this->shared_from_this();

            }


            [[nodiscard]] Handle<syclGeneric::Queue<Device>> makeQueue(alpaka::concepts::QueueKind auto kind)

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::queue + onHost::logger::device);

                static_assert(

                    kind == queueKind::blocking || kind == queueKind::nonBlocking,

                    "Unsupported queue kind.");

                auto thisHandle = this->getSharedPtr();

                std::lock_guard<std::mutex> lk{m_writeGuard};


                constexpr bool isBlocking = kind == queueKind::blocking;

                auto newQueue

                    = std::make_shared<syclGeneric::Queue<Device>>(std::move(thisHandle), queues.size(), isBlocking);


                queues.emplace_back(newQueue);

                return newQueue;

            }


            [[nodiscard]] std::pair<sycl::device, sycl::context> getNativeHandle() const noexcept

            {

                return {m_sycl_dev, m_platform->getContext()};

            }


            void wait()

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::device);

                // Copy queue weak refs under lock then release to avoid blocking other operations while waiting.

                std::vector<std::weak_ptr<syclGeneric::Queue<Device>>> tmpQueues;

                {

                    std::lock_guard<std::mutex> lk{m_writeGuard};

                    tmpQueues = queues;

                }

                for(auto& weakQueue : tmpQueues)

                {

                    if(auto queue = weakQueue.lock())

                    {

                        queue->wait();

                    }

                }

            }


        private:

            friend struct internal::MakeEvent;


            Handle<syclGeneric::Event<Device>> makeEvent()

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::event + onHost::logger::device);

                auto thisHandle = this->getSharedPtr();

                std::lock_guard<std::mutex> lk{m_writeGuard};

                auto newEvent = std::make_shared<syclGeneric::Event<Device>>(std::move(thisHandle), events.size());


                events.emplace_back(newEvent);

                return newEvent;

            }


            void _()

            {

                static_assert(internal::concepts::Device<Device>);

            }


            friend struct alpaka::internal::GetDeviceType;


            auto getDeviceKind() const

            {

                return alpaka::internal::getDeviceKind(*m_platform.get());

            }


            Handle<T_Platform> m_platform;

            uint32_t m_idx = 0u;

            sycl::device m_sycl_dev;


            std::vector<std::weak_ptr<syclGeneric::Queue<Device>>> queues;

            std::vector<std::weak_ptr<syclGeneric::Event<Device>>> events;

            std::mutex m_writeGuard;


            DeviceProperties m_properties;


            friend struct alpaka::internal::GetApi;

            friend struct internal::GetDeviceProperties;

            friend struct internal::GetFreeGlobalMemBytes;

            friend struct internal::AdjustThreadSpec;

            friend struct onHost::internal::AllocDeferred;

            friend struct onHost::internal::AllocUnified;

            friend struct onHost::internal::AllocMapped;

            friend struct onHost::internal::IsDataAccessible;

        };

    } // namespace syclGeneric


    namespace internal

    {


        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct Alloc::Op<T_Type, syclGeneric::Device<T_Platform>, T_Extents>

        {

            auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                constexpr uint32_t alignment = api::util::simdOptimizedAlignment<T_Type>(

                    ALPAKA_TYPEOF(getApi(device)){},

                    ALPAKA_TYPEOF(getDeviceKind(device)){});

                auto [memSizeInByte, pitches] = api::util::emulatedAlignedMemDescription<T_Type>(alignment, extents);


                auto deviceDependency = onHost::Device{device.getSharedPtr()};

                auto [sycl_device, sycl_context] = device.getNativeHandle();


                T_Type* ptr = reinterpret_cast<T_Type*>(

                    sycl::aligned_alloc_device(alignment, memSizeInByte, sycl_device, sycl_context));

                auto deleter = [ctx = sycl_context, ptr]() { sycl::free(toVoidPtr(ptr), ctx); };


                auto sharedBuffer = onHost::SharedBuffer{

                    deviceDependency,

                    ptr,

                    extents,

                    pitches,

                    std::move(deleter),

                    Alignment<alignment>{}};

                return sharedBuffer;

            }

        };


        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct AllocUnified::Op<T_Type, syclGeneric::Device<T_Platform>, T_Extents>

        {

            auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                constexpr uint32_t alignment = api::util::simdOptimizedAlignment<T_Type>(

                    ALPAKA_TYPEOF(getApi(device)){},

                    ALPAKA_TYPEOF(getDeviceKind(device)){});

                auto [memSizeInByte, pitches] = api::util::emulatedAlignedMemDescription<T_Type>(alignment, extents);


                auto deviceDependency = onHost::Device{device.getSharedPtr()};

                auto [sycl_device, sycl_context] = device.getNativeHandle();


                bool isManagedMemorySupported = sycl_device.has(sycl::aspect::usm_shared_allocations);

                if(!isManagedMemorySupported)

                {

                    throw std::runtime_error("Sycl device does not support unified memory allocations.");

                }


                T_Type* ptr = reinterpret_cast<T_Type*>(

                    sycl::aligned_alloc_shared(alignment, memSizeInByte, sycl_device, sycl_context));

                auto deleter = [ctx = sycl_context, ptr]() { sycl::free(toVoidPtr(ptr), ctx); };


                auto sharedBuffer = onHost::SharedBuffer{

                    deviceDependency,

                    ptr,

                    extents,

                    pitches,

                    std::move(deleter),

                    Alignment<alignment>{}};

                return sharedBuffer;

            }

        };


        template<typename T_Type, typename T_Platform, alpaka::concepts::Vector T_Extents>

        struct AllocMapped::Op<T_Type, syclGeneric::Device<T_Platform>, T_Extents>

        {

            auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents const& extents) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                constexpr uint32_t alignment = api::util::simdOptimizedAlignment<T_Type>(

                    ALPAKA_TYPEOF(getApi(device)){},

                    ALPAKA_TYPEOF(getDeviceKind(device)){});

                auto [memSizeInByte, pitches] = api::util::emulatedAlignedMemDescription<T_Type>(alignment, extents);


                auto deviceDependency = onHost::Device{device.getSharedPtr()};

                auto [_, sycl_context] = device.getNativeHandle();


                T_Type* ptr

                    = reinterpret_cast<T_Type*>(sycl::aligned_alloc_host(alignment, memSizeInByte, sycl_context));

                auto deleter = [ctx = sycl_context, ptr]() { sycl::free(toVoidPtr(ptr), ctx); };


                auto sharedBuffer = onHost::SharedBuffer{

                    deviceDependency,

                    ptr,

                    extents,

                    pitches,

                    std::move(deleter),

                    Alignment<alignment>{}};

                return sharedBuffer;

            }

        };


        template<typename T_Platform, typename T_Any>

        struct IsDataAccessible::FirstPath<syclGeneric::Device<T_Platform>, T_Any>

        {

            bool operator()(syclGeneric::Device<T_Platform>& device, T_Any const& view) const

            {

                ALPAKA_LOG_FUNCTION(onHost::logger::memory + onHost::logger::device);

                auto [sycl_device, sycl_context] = device.getNativeHandle();

                auto sycl_alloc_type = sycl::get_pointer_type(data(view), sycl_context);


                if(sycl_alloc_type != sycl::usm::alloc::unknown)

                {

                    try

                    {

                        sycl::device deviceAssociatedWithData = sycl::get_pointer_device(data(view), sycl_context);

                        if(deviceAssociatedWithData == sycl_device)

                        {

                            // sycl device allocated the memory

                            return true;

                        }

                    }

                    catch(...)

                    {

                    }

                }


                if(sycl_alloc_type == sycl::usm::alloc::shared)

                {

                    // is shared within the device context

                    return true;

                }

                else if(sycl_alloc_type == sycl::usm::alloc::unknown)

                {

                    // assume that a sycl cpu device can always access host memory

                    if constexpr(

                        ALPAKA_TYPEOF(getApi(view)){} == api::host

                        && (ALPAKA_TYPEOF(getDeviceKind(device)){} == deviceKind::cpu

                            || ALPAKA_TYPEOF(getDeviceKind(device)){} == deviceKind::numaCpu))

                        return true;

                }


                return false;

            }

        };


        template<typename T_Platform>

        struct GetDeviceProperties::Op<syclGeneric::Device<T_Platform>>

        {

            DeviceProperties operator()(syclGeneric::Device<T_Platform> const& device) const

            {

                return device.m_properties;

            }

        };


        template<

            typename T_Platform,

            alpaka::concepts::Executor T_Executor,

            alpaka::concepts::Vector T_NumFrames,

            alpaka::concepts::Vector T_FrameExtents,

            alpaka::concepts::KernelBundle T_KernelBundle>

        struct AdjustThreadSpec::

            Op<syclGeneric::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>, T_KernelBundle>

        {

            using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>;


            auto operator()(

                syclGeneric::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const requires alpaka::concepts::CVector<T_FrameExtents>

            {

                alpaka::unused(device, kernelBundle);

                ALPAKA_LOG_FUNCTION(onHost::logger::kernel + onHost::logger::device);

                auto numThreads = frameSpec.getFrameExtents();


                using ApiType = ALPAKA_TYPEOF(getApi(device));

                using DeviceKindType = ALPAKA_TYPEOF(getDeviceKind(device));

                constexpr auto result = api::util::adjustToLimit<

                    alpaka::onHost::getMaxThreadsPerBlock(ApiType{}, DeviceKindType{}, T_Executor{}),

                    0u,

                    1u>(numThreads);

                return ThreadSpec{frameSpec.getNumFrames(), result};

            }


            auto operator()(

                syclGeneric::Device<T_Platform> const& device,

                FrameSpecType const& frameSpec,

                T_KernelBundle const& kernelBundle) const

            {

                alpaka::unused(kernelBundle);

                ALPAKA_LOG_FUNCTION(onHost::logger::kernel + onHost::logger::device);

                auto numThreadsPerBlocks = frameSpec.getFrameExtents();

                auto const maxThreadsPerBlock = device.m_properties.maxThreadsPerBlock;


                auto result = api::util::adjustToLimit(numThreadsPerBlocks, maxThreadsPerBlock);

                return ThreadSpec{frameSpec.getNumFrames(), result};

            }

        };


    } // namespace internal

} // namespace alpaka::onHost


namespace alpaka::internal

{

    template<typename T_Platform>

    struct GetApi::Op<onHost::syclGeneric::Device<T_Platform>>

    {

        decltype(auto) operator()(auto&& device) const

        {

            return internal::getApi(*device.m_platform.get());

        }

    };

} // namespace alpaka::internal


#endif

SharedBuffer.hpp

Vec.hpp

Event.hpp

Queue.hpp

util.hpp

ALPAKA_TYPEOF
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:154

config.hpp

ALPAKA_LOG_FUNCTION
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95

alpaka::api::util::adjustToLimit
consteval auto adjustToLimit(concepts::CVector auto const input)
adjust the input vector to a given limit by halving all components until the product of these is belo...
Definition util.hpp:64

alpaka::api::util::emulatedAlignedMemDescription
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
Definition util.hpp:101

alpaka::api::util::simdOptimizedAlignment
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
Definition util.hpp:141

alpaka::api::host
constexpr auto host
Definition Api.hpp:39

alpaka::deviceKind::cpu
constexpr auto cpu
Definition tag.hpp:168

alpaka::deviceKind::numaCpu
constexpr auto numaCpu
Definition tag.hpp:178

alpaka::internal
alpaka internal implementations.
Definition generic.hpp:19

alpaka::internal::getDeviceKind
constexpr auto getDeviceKind(auto &&any) -> decltype(GetDeviceType::Op< ALPAKA_TYPEOF(any)>{}(any))
Definition interface.hpp:109

alpaka::internal::getApi
constexpr auto getApi(auto &&any) -> decltype(GetApi::Op< ALPAKA_TYPEOF(any)>{}(any))
Definition interface.hpp:62

alpaka::onAcc::scope::device
constexpr Device device
Definition scope.hpp:70

alpaka::onHost::internal
Definition cpuArchSize.hpp:13

alpaka::onHost::internal::getDeviceProperties
DeviceProperties getDeviceProperties(auto const &platform, uint32_t idx)
Definition interface.hpp:447

alpaka::onHost::logger::queue
constexpr auto queue
Definition lvl.hpp:127

alpaka::onHost::logger::device
constexpr auto device
Definition lvl.hpp:82

alpaka::onHost::logger::kernel
constexpr auto kernel
Definition lvl.hpp:142

alpaka::onHost::logger::memory
constexpr auto memory
Definition lvl.hpp:112

alpaka::onHost
Functionality which is usable on the host CPU controller thread.
Definition api.hpp:40

alpaka::onHost::getNativeHandle
auto getNativeHandle(auto const &handle)
Get the native handle of an handle.
Definition interface.hpp:131

alpaka::onHost::getMaxThreadsPerBlock
consteval uint32_t getMaxThreadsPerBlock(T_Api api, T_DeviceKind deviceKind, T_Exec exec)
A safe(ish) compile time lower bound on max threads per block for a given combination of API,...
Definition trait.hpp:210

alpaka::onHost::SharedBuffer
SharedBuffer(T_Any const &, T_Type *, T_UserExtents const &, T_UserPitches const &, std::invocable<> auto, T_MemAlignment const) -> SharedBuffer< ALPAKA_TYPEOF(getApi(std::declval< T_Any >())), T_Type, typename T_UserPitches::UniVec, T_MemAlignment >

alpaka::onHost::data
decltype(auto) data(auto &&any)
pointer to data of an object
Definition interface.hpp:157

alpaka::onHost::getName
std::convertible_to< std::string > auto getName(auto &&any)
Runtime name for a given object.
Definition interface.hpp:106

alpaka::onHost::ThreadSpec
ThreadSpec(T_NumBlocks const &, T_NumThreads const &) -> ThreadSpec< alpaka::trait::getVec_t< T_NumBlocks >, alpaka::trait::getVec_t< T_NumThreads > >

alpaka::onHost::Device
Device(Handle< T_Device > &&) -> Device< ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval< T_Device >())), ALPAKA_TYPEOF(alpaka::internal::getDeviceKind(std::declval< T_Device >()))>

alpaka::onHost::wait
void wait(alpaka::concepts::HasGet auto &handle)
wait for all work to be finished
Definition interface.hpp:142

alpaka::toVoidPtr
auto * toVoidPtr(T inPtr)
Cast a pointer that may or may not point to volatile memory to a (void*) or (void const*).
Definition util.hpp:34

alpaka::getDeviceKind
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
Definition interface.hpp:78

alpaka::getApi
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
Definition interface.hpp:42

alpaka::get
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
Definition Dict.hpp:156

alpaka::internal::GetApi::Op
Definition interface.hpp:54

alpaka::onHost::internal::AdjustThreadSpec
Definition interface.hpp:266

alpaka::onHost::internal::AllocMapped::Op
Definition interface.hpp:351

alpaka::onHost::internal::AllocMapped::Op::operator()
void operator()(T_Any &any, T_Extents const &) const

alpaka::onHost::internal::AllocUnified::Op
Definition interface.hpp:342

alpaka::onHost::internal::AllocUnified::Op::operator()
void operator()(T_Any &any, T_Extents const &) const

alpaka::onHost::internal::Alloc::Op
Definition interface.hpp:324

alpaka::onHost::internal::Alloc::Op::operator()
void operator()(T_Any &any, T_Extents const &) const

alpaka::onHost::internal::GetDeviceProperties::Op
Definition interface.hpp:428

alpaka::onHost::internal::GetDeviceProperties::Op::operator()
DeviceProperties operator()(auto const &platform, uint32_t idx) const

alpaka::onHost::internal::IsDataAccessible::FirstPath
Definition interface.hpp:368

alpaka::onHost::internal::IsDataAccessible::FirstPath::operator()
bool operator()(T_Device &device, T_Any const &any) const