alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Platform.hpp
Go to the documentation of this file.
1/* Copyright 2024 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
16#include "alpaka/tag.hpp"
17
18#include <memory>
19#include <sstream>
20
21namespace alpaka::onHost
22{
23 namespace cpu
24 {
25 template<alpaka::concepts::DeviceKind T_DeviceKind>
26 struct Platform : std::enable_shared_from_this<Platform<T_DeviceKind>>
27 {
28 public:
29 Platform() = default;
30
31 Platform(Platform const&) = delete;
32 Platform& operator=(Platform const&) = delete;
33
34 Platform(Platform&&) = delete;
36
37 private:
38 void _()
39 {
40 static_assert(internal::concepts::Platform<Platform>);
41 }
42
43 std::vector<std::weak_ptr<cpu::Device<Platform>>> devices;
44 std::mutex deviceGuard;
45
46 std::shared_ptr<Platform> getSharedPtr()
47 {
48 return this->shared_from_this();
49 }
50
51 friend struct alpaka::internal::GetName;
52
53 std::string getName() const
54 {
55 return "host::Platform";
56 }
57
58 friend struct internal::GetDeviceCount;
59
60 uint32_t getDeviceCount()
61 {
62 uint32_t devCount = 0u;
63
64 constexpr bool isSupportedDev = trait::IsDeviceSupportedBy::Op<T_DeviceKind, api::Host>::value;
65 if constexpr(isSupportedDev)
66 {
67 if constexpr(T_DeviceKind{} == deviceKind::numaCpu)
68 {
69 devCount = alpaka::onHost::internal::hwloc::getNumNumaDomains();
70 }
71 else
72 devCount = 1;
73
74 if(devices.size() < static_cast<size_t>(devCount))
75 {
76 std::lock_guard<std::mutex> lk{deviceGuard};
77 devices.resize(devCount);
78 }
79 }
80 return devCount;
81 }
82
83 friend struct internal::MakeDevice;
84
85 Handle<cpu::Device<Platform>> makeDevice(uint32_t const& idx)
86 {
88 uint32_t const numDevices = getDeviceCount();
89 if(idx >= numDevices)
90 {
91 std::stringstream ssErr;
92 ssErr << "Unable to return device handle with index " << idx << " because there are only "
93 << numDevices << " devices of type '" << alpaka::onHost::getStaticName(T_DeviceKind{})
94 << "' !";
95 throw std::runtime_error(ssErr.str());
96 }
97 std::lock_guard<std::mutex> lk{deviceGuard};
98
99 if(auto sharedPtr = devices[idx].lock())
100 {
101 return sharedPtr;
102 }
103 auto thisHandle = getSharedPtr();
104 uint32_t numaIdx = internal::hwloc::allNumaDomains;
105 if constexpr(T_DeviceKind{} == deviceKind::numaCpu)
106 {
107 numaIdx = idx;
108 }
109 auto newDevice = std::make_shared<cpu::Device<Platform>>(std::move(thisHandle), idx, numaIdx);
110 devices[idx] = newDevice;
111 return newDevice;
112 }
113
114 friend struct internal::GetDeviceProperties;
115
116 friend struct alpaka::internal::GetDeviceType;
117
118 T_DeviceKind getDeviceKind() const
119 {
120 return T_DeviceKind{};
121 }
122 };
123 } // namespace cpu
124
125 namespace internal
126 {
127 template<alpaka::concepts::DeviceKind T_DeviceKind>
128 struct MakePlatform::Op<api::Host, T_DeviceKind>
129 {
130 auto operator()(api::Host, T_DeviceKind) const
131 {
133 }
134 };
135
136 template<alpaka::concepts::DeviceKind T_DeviceKind>
137 struct GetDeviceProperties::Op<cpu::Platform<T_DeviceKind>>
138 {
139 DeviceProperties operator()(cpu::Platform<T_DeviceKind> const& platform, uint32_t deviceIdx) const
140 {
141 alpaka::unused(platform);
143 auto prop = DeviceProperties{};
144 prop.name = getCpuName();
145 prop.warpSize = 1u;
146 prop.multiProcessorCount = hwloc::getNumCores(hwloc::allNumaDomains);
147 prop.globalMemCapacityBytes = hwloc::getMemCapacityBytes(hwloc::allNumaDomains);
148 prop.sharedMemPerBlockBytes = ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB * 1024u;
149
150 if constexpr(T_DeviceKind{} == deviceKind::numaCpu)
151 {
152 // the deviceIdx is equal to the numa domain index
153 prop.multiProcessorCount = hwloc::getNumCores(deviceIdx);
154 prop.globalMemCapacityBytes = hwloc::getMemCapacityBytes(deviceIdx);
155 }
156 else
157 alpaka::unused(deviceIdx);
158
159 prop.maxThreadsPerBlock = std::numeric_limits<uint32_t>::max();
160 prop.fnMaxThreadsPerBlock = [](uint32_t* data, uint32_t numDims)
161 {
162 for(uint32_t d = 0u; d < numDims; ++d)
163 data[d] = std::numeric_limits<uint32_t>::max();
164 };
165
166 prop.maxBlocksPerGrid = std::numeric_limits<uint32_t>::max();
167 prop.fnMaxBlocksPerGrid = [](uint32_t* data, uint32_t numDims)
168 {
169 for(uint32_t d = 0u; d < numDims; ++d)
170 data[d] = std::numeric_limits<uint32_t>::max();
171 };
172
173 return prop;
174 }
175 };
176 } // namespace internal
177} // namespace alpaka::onHost
178
179namespace alpaka::internal
180{
181 template<alpaka::concepts::DeviceKind T_DeviceKind>
182 struct GetApi::Op<onHost::cpu::Platform<T_DeviceKind>>
183 {
184 inline constexpr auto operator()(auto&& platform) const
185 {
186 alpaka::unused(platform);
187 return api::Host{};
188 }
189 };
190} // namespace alpaka::internal
#define ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95
constexpr auto cpu
Definition tag.hpp:170
constexpr auto numaCpu
Definition tag.hpp:180
constexpr Api api
Definition tag.hpp:24
constexpr auto device
Definition lvl.hpp:82
Functionality which is usable on the host CPU controller thread.
Definition api.hpp:40
std::shared_ptr< T > Handle
Definition Handle.hpp:30
decltype(auto) data(auto &&any)
pointer to data of an object
std::convertible_to< std::string > auto getName(auto &&any)
Runtime name for a given object.
Definition interface.hpp:96
auto make_sharedSingleton(T_Args &&... args)
Definition Handle.hpp:14
auto getCpuName() -> std::string
Definition sysInfo.hpp:78
std::convertible_to< std::string > auto getStaticName(auto const &any)
Compile‑time available name for a given object.
Definition interface.hpp:76
Platform(Platform const &)=delete
Platform & operator=(Platform &&)=delete
Platform & operator=(Platform const &)=delete
Platform(Platform &&)=delete