17# include <sycl/sycl.hpp>
23 template<
typename T_Platform>
24 struct Device : std::enable_shared_from_this<Device<T_Platform>>
27 Device(internal::concepts::PlatformHandle
auto platform,
auto const& dev, uint32_t
const idx)
28 : m_platform(std::move(platform))
31 , m_properties{internal::getDeviceProperties(*m_platform.
get(), m_idx)}
41 Device(Device
const&) =
delete;
42 Device& operator=(Device
const&) =
delete;
45 Device& operator=(Device&&) =
delete;
49 return m_sycl_dev.get_info<sycl::info::device::name>();
52 std::shared_ptr<Device<T_Platform>> getSharedPtr()
54 return this->shared_from_this();
57 [[nodiscard]] Handle<syclGeneric::Queue<Device>> makeQueue(alpaka::concepts::QueueKind
auto kind)
61 kind == queueKind::blocking || kind == queueKind::nonBlocking,
62 "Unsupported queue kind.");
63 auto thisHandle = this->getSharedPtr();
64 std::lock_guard<std::mutex> lk{m_writeGuard};
66 constexpr bool isBlocking = kind == queueKind::blocking;
68 = std::make_shared<syclGeneric::Queue<Device>>(std::move(thisHandle), queues.size(), isBlocking);
70 queues.emplace_back(newQueue);
74 [[nodiscard]] std::pair<sycl::device, sycl::context>
getNativeHandle() const noexcept
76 return {m_sycl_dev, m_platform->getContext()};
83 std::vector<std::weak_ptr<syclGeneric::Queue<Device>>> tmpQueues;
85 std::lock_guard<std::mutex> lk{m_writeGuard};
88 for(
auto& weakQueue : tmpQueues)
90 if(
auto queue = weakQueue.lock())
98 friend struct internal::MakeEvent;
100 Handle<syclGeneric::Event<Device>> makeEvent()
103 auto thisHandle = this->getSharedPtr();
104 std::lock_guard<std::mutex> lk{m_writeGuard};
105 auto newEvent = std::make_shared<syclGeneric::Event<Device>>(std::move(thisHandle), events.size());
107 events.emplace_back(newEvent);
113 static_assert(internal::concepts::Device<Device>);
116 friend struct alpaka::internal::GetDeviceType;
120 return alpaka::internal::getDeviceKind(*m_platform.get());
123 Handle<T_Platform> m_platform;
125 sycl::device m_sycl_dev;
127 std::vector<std::weak_ptr<syclGeneric::Queue<Device>>> queues;
128 std::vector<std::weak_ptr<syclGeneric::Event<Device>>> events;
129 std::mutex m_writeGuard;
131 DeviceProperties m_properties;
133 friend struct alpaka::internal::GetApi;
134 friend struct internal::GetDeviceProperties;
135 friend struct internal::GetFreeGlobalMemBytes;
136 friend struct internal::AdjustThreadSpec;
137 friend struct onHost::internal::AllocDeferred;
138 friend struct onHost::internal::AllocUnified;
139 friend struct onHost::internal::AllocMapped;
140 friend struct onHost::internal::IsDataAccessible;
147 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
148 struct Alloc::Op<T_Type, syclGeneric::
Device<T_Platform>, T_Extents>
150 auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents
const& extents)
const
159 auto [sycl_device, sycl_context] =
device.getNativeHandle();
161 T_Type* ptr =
reinterpret_cast<T_Type*
>(
162 sycl::aligned_alloc_device(alignment, memSizeInByte, sycl_device, sycl_context));
163 auto deleter = [ctx = sycl_context, ptr]() { sycl::free(
toVoidPtr(ptr), ctx); };
171 Alignment<alignment>{}};
176 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
177 struct AllocUnified::Op<T_Type, syclGeneric::
Device<T_Platform>, T_Extents>
179 auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents
const& extents)
const
188 auto [sycl_device, sycl_context] =
device.getNativeHandle();
190 bool isManagedMemorySupported = sycl_device.has(sycl::aspect::usm_shared_allocations);
191 if(!isManagedMemorySupported)
193 throw std::runtime_error(
"Sycl device does not support unified memory allocations.");
196 T_Type* ptr =
reinterpret_cast<T_Type*
>(
197 sycl::aligned_alloc_shared(alignment, memSizeInByte, sycl_device, sycl_context));
198 auto deleter = [ctx = sycl_context, ptr]() { sycl::free(
toVoidPtr(ptr), ctx); };
206 Alignment<alignment>{}};
211 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
212 struct AllocMapped::Op<T_Type, syclGeneric::
Device<T_Platform>, T_Extents>
214 auto operator()(syclGeneric::Device<T_Platform>& device, T_Extents
const& extents)
const
223 auto [_, sycl_context] =
device.getNativeHandle();
226 =
reinterpret_cast<T_Type*
>(sycl::aligned_alloc_host(alignment, memSizeInByte, sycl_context));
227 auto deleter = [ctx = sycl_context, ptr]() { sycl::free(
toVoidPtr(ptr), ctx); };
235 Alignment<alignment>{}};
240 template<
typename T_Platform,
typename T_Any>
241 struct IsDataAccessible::FirstPath<syclGeneric::
Device<T_Platform>, T_Any>
243 bool operator()(syclGeneric::Device<T_Platform>& device, T_Any
const& view)
const
246 auto [sycl_device, sycl_context] =
device.getNativeHandle();
247 auto sycl_alloc_type = sycl::get_pointer_type(
data(view), sycl_context);
249 if(sycl_alloc_type != sycl::usm::alloc::unknown)
253 sycl::device deviceAssociatedWithData = sycl::get_pointer_device(
data(view), sycl_context);
254 if(deviceAssociatedWithData == sycl_device)
265 if(sycl_alloc_type == sycl::usm::alloc::shared)
270 else if(sycl_alloc_type == sycl::usm::alloc::unknown)
284 template<
typename T_Platform>
285 struct GetDeviceProperties::Op<syclGeneric::
Device<T_Platform>>
287 DeviceProperties operator()(syclGeneric::Device<T_Platform>
const& device)
const
289 return device.m_properties;
295 alpaka::concepts::Executor T_Executor,
296 alpaka::concepts::Vector T_NumFrames,
297 alpaka::concepts::Vector T_FrameExtents,
298 alpaka::concepts::KernelBundle T_KernelBundle>
299 struct AdjustThreadSpec::
300 Op<syclGeneric::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>, T_KernelBundle>
302 using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>;
305 syclGeneric::Device<T_Platform>
const& device,
306 FrameSpecType
const& frameSpec,
307 T_KernelBundle
const& kernelBundle)
const requires alpaka::concepts::CVector<T_FrameExtents>
309 alpaka::unused(device, kernelBundle);
311 auto numThreads = frameSpec.getFrameExtents();
318 constexpr typename ALPAKA_TYPEOF(numThreads)::type hardwareLimitThreadsPerBlock = 1024u;
321 return ThreadSpec{frameSpec.getNumFrames(), result};
325 syclGeneric::Device<T_Platform>
const& device,
326 FrameSpecType
const& frameSpec,
327 T_KernelBundle
const& kernelBundle)
const
329 alpaka::unused(kernelBundle);
331 auto numThreadsPerBlocks = frameSpec.getFrameExtents();
332 auto const maxThreadsPerBlock =
device.m_properties.maxThreadsPerBlock;
335 return ThreadSpec{frameSpec.getNumFrames(), result};
342namespace alpaka::internal
344 template<
typename T_Platform>
345 struct GetApi::Op<onHost::syclGeneric::Device<T_Platform>>
347 decltype(
auto)
operator()(
auto&&
device)
const
349 return internal::getApi(*
device.m_platform.get());
#define ALPAKA_TYPEOF(...)
Get the type of instance.
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
consteval auto adjustToLimit(concepts::CVector auto const input)
adjust the input vector to a given limit by halving all components until the product of these is is b...
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
Functionality which is usable on the host CPU controller thread.
auto getNativeHandle(auto const &handle)
Get the native handle of an handle.
SharedBuffer(T_Any const &, T_Type *, T_UserExtents const &, T_UserPitches const &, std::invocable<> auto, T_MemAlignment const) -> SharedBuffer< ALPAKA_TYPEOF(getApi(std::declval< T_Any >())), T_Type, typename T_UserPitches::UniVec, T_MemAlignment >
decltype(auto) data(auto &&any)
pointer to data of an object
std::convertible_to< std::string > auto getName(auto &&any)
Runtime name for a given object.
ThreadSpec(T_NumBlocks const &, T_NumThreads const &) -> ThreadSpec< alpaka::trait::getVec_t< T_NumBlocks >, alpaka::trait::getVec_t< T_NumThreads > >
Device(Handle< T_Device > &&) -> Device< ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval< T_Device >())), ALPAKA_TYPEOF(alpaka::internal::getDeviceKind(std::declval< T_Device >()))>
void wait(alpaka::concepts::HasGet auto &handle)
wait for all work to be finished
auto * toVoidPtr(T inPtr)
Cast a pointer that may or may not point to volatile memory to a (void*) or (void const*).
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept