31 template<
typename T_Platform>
32 struct Device : std::enable_shared_from_this<Device<T_Platform>>
35 Device(internal::concepts::PlatformHandle
auto platform, uint32_t
const idx, uint32_t numaIdx)
36 : m_platform(
std::move(platform))
39 , m_properties{internal::getDeviceProperties(*m_platform.
get(), m_idx)}
57 return m_idx == other.m_idx;
62 return m_idx != other.m_idx;
70 std::vector<std::weak_ptr<cpu::Queue<Device>>> tmpQueues;
72 std::lock_guard<std::mutex> lk{queuesGuard};
75 for(
auto& weakQueue : tmpQueues)
77 if(
auto queue = weakQueue.lock())
79 internal::wait(*queue);
87 static_assert(internal::concepts::Device<Device>);
92 uint32_t m_numaIdx = internal::hwloc::allNumaDomains;
94 std::vector<std::weak_ptr<cpu::Queue<Device>>> queues;
95 std::vector<std::weak_ptr<cpu::Event<Device>>> events;
96 std::mutex queuesGuard;
98 std::shared_ptr<Device> getSharedPtr()
100 return this->shared_from_this();
103 template<
typename T_Device>
106 void setThreadAffinity()
const
108 internal::hwloc::setThreadAffinity(m_numaIdx);
112 void pinPointer(T*
const ptr,
size_t bytes)
114 internal::hwloc::pinPointer(ptr, bytes, m_numaIdx);
117 bool isNumaAware()
const
119 return m_numaIdx != internal::hwloc::allNumaDomains;
122 friend struct alpaka::internal::GetName;
124 std::string getName()
const
126 return m_properties.
name;
129 friend struct internal::GetNativeHandle;
131 [[nodiscard]] uint32_t getNativeHandle() const noexcept
136 friend struct internal::MakeQueue;
143 "Unsupported queue kind.");
144 auto thisHandle = this->getSharedPtr();
145 std::lock_guard<std::mutex> lk{queuesGuard};
148 auto newQueue = std::make_shared<cpu::Queue<Device>>(
149 std::move(thisHandle),
154 queues.emplace_back(newQueue);
158 friend struct internal::MakeEvent;
163 auto thisHandle = this->getSharedPtr();
164 std::lock_guard<std::mutex> lk{queuesGuard};
165 auto newEvent = std::make_shared<cpu::Event<Device>>(std::move(thisHandle), queues.size());
167 events.emplace_back(newEvent);
171 friend struct alpaka::internal::GetDeviceType;
173 auto getDeviceKind()
const
175 return alpaka::internal::getDeviceKind(*m_platform.get());
178 auto getFreeGlobalMemBytes()
const
182 return internal::hwloc::getFreeGlobalMemBytes(m_numaIdx);
187 friend struct internal::Alloc;
188 friend struct alpaka::internal::GetApi;
189 friend struct internal::GetDeviceProperties;
190 friend struct internal::GetFreeGlobalMemBytes;
191 friend struct internal::AdjustThreadSpec;
192 friend struct internal::AllocDeferred;
193 friend struct internal::AllocUnified;
194 friend struct internal::AllocMapped;
201 template<
typename T_Platform>
206 template<
typename T_Platform>
212 template<
typename T_Platform>
221 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
222 struct Alloc::Op<T_Type, cpu::
Device<T_Platform>, T_Extents>
224 auto operator()(cpu::Device<T_Platform>& device, T_Extents
const& extents)
const
235 device.pinPointer(ptr, memSizeInByte);
245 Alignment<alignment>{}};
251 std::stringstream ss;
259 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
260 struct AllocUnified::Op<T_Type,
cpu::
Device<T_Platform>, T_Extents>
262 auto operator()(cpu::Device<T_Platform>& device, T_Extents
const& extents)
const
265 return Alloc::Op<T_Type, cpu::Device<T_Platform>, T_Extents>{}(
device, extents);
269 template<
typename T_Type,
typename T_Platform, alpaka::concepts::Vector T_Extents>
270 struct AllocMapped::Op<T_Type,
cpu::
Device<T_Platform>, T_Extents>
272 auto operator()(cpu::Device<T_Platform>& device, T_Extents
const& extents)
const
275 return Alloc::Op<T_Type, cpu::Device<T_Platform>, T_Extents>{}(
device, extents);
279 template<
typename T_Platform,
typename T_Any>
280 struct IsDataAccessible::FirstPath<
cpu::
Device<T_Platform>, T_Any>
282 bool operator()(cpu::Device<T_Platform>& device, T_Any
const& view)
const
301 alpaka::concepts::Vector T_NumFrames,
302 alpaka::concepts::Vector T_FrameExtents,
303 alpaka::concepts::KernelBundle T_KernelBundle>
304 struct AdjustThreadSpec::
305 Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, exec::CpuSerial>, T_KernelBundle>
307 using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, exec::CpuSerial>;
310 cpu::Device<T_Platform>
const& device,
311 FrameSpecType
const& frameSpec,
312 T_KernelBundle
const& kernelBundle)
const requires alpaka::concepts::CVector<T_FrameExtents>
314 alpaka::unused(device, kernelBundle);
319 iotaCVec<
typename T_FrameExtents::type, T_FrameExtents::dim()>())::template
fill<1u>();
320 return ThreadSpec{allOne, allOne, frameSpec.getExecutor()};
324 cpu::Device<T_Platform>
const& device,
325 FrameSpecType
const& frameSpec,
326 T_KernelBundle
const& kernelBundle)
const
328 alpaka::unused(device, kernelBundle);
332 iotaCVec<
typename T_FrameExtents::type, T_FrameExtents::dim()>())::template
fill<1u>();
333 return ThreadSpec{allOne, allOne, frameSpec.getExecutor()};
339 alpaka::concepts::Executor T_Executor,
340 alpaka::concepts::Vector T_NumFrames,
341 alpaka::concepts::Vector T_FrameExtents,
342 alpaka::concepts::KernelBundle T_KernelBundle>
344 struct AdjustThreadSpec::
345 Op<cpu::Device<T_Platform>, FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>, T_KernelBundle>
347 using FrameSpecType = FrameSpec<T_NumFrames, T_FrameExtents, T_Executor>;
350 cpu::Device<T_Platform>
const& device,
351 FrameSpecType
const& frameSpec,
352 T_KernelBundle
const& kernelBundle)
const requires alpaka::concepts::CVector<T_FrameExtents>
354 alpaka::unused(device, kernelBundle);
358 auto numThreadBlocks = frameSpec.getNumFrames();
359 return ThreadSpec{numThreadBlocks, T_FrameExtents::template
fill<1u>(), frameSpec.getExecutor()};
363 cpu::Device<T_Platform>
const& device,
364 FrameSpecType
const& frameSpec,
365 T_KernelBundle
const& kernelBundle)
const
367 alpaka::unused(device, kernelBundle);
371 auto numThreadBlocks = frameSpec.getNumFrames();
372 auto const numThreads =
Vec<
typename T_FrameExtents::type, T_FrameExtents::dim()>
::fill(1);
373 return ThreadSpec{numThreadBlocks, numThreads, frameSpec.getExecutor()};
377 template<
typename T_Platform>
378 struct GetDeviceProperties::Op<
cpu::
Device<T_Platform>>
380 DeviceProperties operator()(cpu::Device<T_Platform>
const& device)
const
382 return device.m_properties;
388namespace alpaka::internal
390 template<
typename T_Platform>
391 struct GetApi::Op<onHost::cpu::Device<T_Platform>>
393 inline constexpr auto operator()(
auto&& device)
const
#define ALPAKA_TYPEOF(...)
Get the type of instance.
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, auto ptr)
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
constexpr bool isSeqExecutor_v
Functionality which is usable on the host CPU controller thread.
SharedBuffer(T_Any const &, T_Type *, T_UserExtents const &, T_UserPitches const &, std::invocable<> auto, T_MemAlignment const) -> SharedBuffer< ALPAKA_TYPEOF(getApi(std::declval< T_Any >())), T_Type, typename T_UserPitches::UniVec, T_MemAlignment >
void fill(Queue< T_Device, T_QueueKind > const &queue, auto &&dest, T_Value elementValue)
fill memory element wise
std::shared_ptr< T > Handle
auto getFreeGlobalMemBytes() -> std::size_t
ThreadSpec(T_NumBlocks const &, T_NumThreads const &) -> ThreadSpec< alpaka::trait::getVec_t< T_NumBlocks >, alpaka::trait::getVec_t< T_NumThreads > >
Device(Handle< T_Device > &&) -> Device< ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval< T_Device >())), ALPAKA_TYPEOF(alpaka::internal::getDeviceKind(std::declval< T_Device >()))>
constexpr auto nonBlocking
ALPAKA_FN_HOST_ACC Vec(T_1, T_Args...) -> Vec< T_1, uint32_t(sizeof...(T_Args)+1u), ArrayStorage< T_1, uint32_t(sizeof...(T_Args)+1u)> >
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
consteval auto iotaCVec()
Create and return a CVector of the given length with values 1, 2, ...
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
std::string name
The name of the device.
bool operator!=(Device const &other) const
Device(internal::concepts::PlatformHandle auto platform, uint32_t const idx, uint32_t numaIdx)
Device & operator=(Device &&)=delete
Device(Device const &)=delete
Device & operator=(Device const &)=delete
bool operator==(Device const &other) const