23namespace alpaka::onHost::internal::hwloc
31 constexpr uint32_t allNumaDomains = std::numeric_limits<uint32_t>::max();
42 static TopologyCache& instance()
44 static TopologyCache topology;
48 hwloc_topology_t
get() const noexcept
53 hwloc_obj_t getNumaObj(uint32_t numaIdx)
const
55 hwloc_obj_t obj = hwloc_get_obj_by_type(m_topology, HWLOC_OBJ_NUMANODE,
static_cast<unsigned>(numaIdx));
58 throw std::out_of_range(
"NUMA domain index out of range: " + std::to_string(numaIdx));
63 uint32_t getNumNumaDomains()
const
65 int const count = hwloc_get_nbobjs_by_type(m_topology, HWLOC_OBJ_NUMANODE);
68 throw std::runtime_error(
"hwloc_get_nbobjs_by_type(HWLOC_OBJ_NUMANODE) failed");
70 return static_cast<uint32_t
>(count);
76 if(hwloc_topology_init(&m_topology) != 0)
78 throw std::runtime_error(
"hwloc_topology_init failed");
80 if(hwloc_topology_load(m_topology) != 0)
82 hwloc_topology_destroy(m_topology);
83 throw std::runtime_error(
"hwloc_topology_load failed");
89 if(m_topology !=
nullptr)
91 hwloc_topology_destroy(m_topology);
95 TopologyCache(TopologyCache
const&) =
delete;
96 TopologyCache& operator=(TopologyCache
const&) =
delete;
97 TopologyCache(TopologyCache&&) =
delete;
98 TopologyCache& operator=(TopologyCache&&) =
delete;
101 hwloc_topology_t m_topology{};
104 [[noreturn]]
inline void throwErrno(
char const* what)
106 throw std::runtime_error(std::string(what) +
": " + std::strerror(errno));
110 inline hwloc_topology_t getTopology()
112 return TopologyCache::instance().get();
116 inline hwloc_obj_t getNumaObj(uint32_t numaIdx)
118 return TopologyCache::instance().getNumaObj(numaIdx);
123 inline uint32_t getNumNumaDomains()
126 return TopologyCache::instance().getNumNumaDomains();
140 inline std::optional<size_t> parseNodeMemInfoValueBytes(
unsigned osNodeIndex, std::string_view key)
142 std::ifstream in(
"/sys/devices/system/node/node" + std::to_string(osNodeIndex) +
"/meminfo");
149 while(std::getline(in, line))
151 if(line.find(std::string(key)) == std::string::npos)
158 std::istringstream iss(line);
159 std::string nodeWord;
160 unsigned nodeNumber = 0;
164 if(iss >> nodeWord >> nodeNumber >> field >> valueKB >> unit)
166 if(field == key && unit ==
"kB")
168 return valueKB * 1024ULL;
180 inline void setThreadAffinity(uint32_t numaIdx)
183 hwloc_cpuset_t cpuset =
nullptr;
185 if(numaIdx == allNumaDomains)
187 hwloc_const_cpuset_t
const fullSet = hwloc_topology_get_complete_cpuset(getTopology());
188 if(fullSet ==
nullptr)
190 throw std::runtime_error(
"Topology has no complete cpuset");
193 cpuset = hwloc_bitmap_dup(fullSet);
197 hwloc_obj_t
const node = getNumaObj(numaIdx);
198 if(node->cpuset ==
nullptr)
200 throw std::runtime_error(
"NUMA node has no cpuset");
203 cpuset = hwloc_bitmap_dup(node->cpuset);
206 if(cpuset ==
nullptr)
208 throw std::bad_alloc();
211 int const rc = hwloc_set_cpubind(getTopology(), cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
213 hwloc_bitmap_free(cpuset);
217 throwErrno(
"hwloc_set_cpubind failed");
220 alpaka::unused(numaIdx);
234 inline void pinPointer(T*
const ptr,
size_t bytes, uint32_t numaIdx)
237 if(numaIdx == allNumaDomains)
240 if(ptr ==
nullptr || bytes == 0u)
243 hwloc_obj_t
const node = getNumaObj(numaIdx);
244 if(node->nodeset ==
nullptr)
246 throw std::runtime_error(
"NUMA node has no nodeset");
249 hwloc_nodeset_t nodeset = hwloc_bitmap_dup(node->nodeset);
250 if(nodeset ==
nullptr)
252 throw std::bad_alloc();
255 int const rc = hwloc_set_area_membind(
261 HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_STRICT);
263 hwloc_bitmap_free(nodeset);
267# ifdef ALPAKA_HOST_MEM_PINNING_CAN_FAIL
269 bool const operationNotSupported = errno == EPERM;
271 bool const functionNotImplemented = errno == ENOSYS;
273 bool const operationNotAllowed = errno == EXDEV;
274 if(operationNotSupported || functionNotImplemented || operationNotAllowed)
279 throwErrno(
"hwloc_set_area_membind failed");
282 alpaka::unused(ptr, bytes, numaIdx);
293 inline uint32_t getNumCores(uint32_t numaIdx)
296 if(numaIdx == allNumaDomains)
297 return std::thread::hardware_concurrency();
299 hwloc_obj_t
const node = getNumaObj(numaIdx);
300 if(node->cpuset ==
nullptr)
302 throw std::runtime_error(
"NUMA node has no cpuset");
305 int const numPUs = hwloc_bitmap_weight(node->cpuset);
308 throw std::runtime_error(
"hwloc_bitmap_weight failed");
311 return static_cast<uint32_t
>(numPUs);
313 alpaka::unused(numaIdx);
314 return std::thread::hardware_concurrency();
322 inline size_t getMemCapacityBytes(uint32_t numaIdx)
325 if(numaIdx == allNumaDomains)
328 hwloc_obj_t
const node = getNumaObj(numaIdx);
329 if(node->attr ==
nullptr)
331 throw std::runtime_error(
"NUMA node has no attributes");
334 return static_cast<size_t>(node->attr->numanode.local_memory);
337 alpaka::unused(numaIdx);
348 inline size_t getFreeGlobalMemBytes(uint32_t numaIdx)
351 if(numaIdx == allNumaDomains)
354 hwloc_obj_t
const node = getNumaObj(numaIdx);
355 auto const freeBytes = parseNodeMemInfoValueBytes(node->os_index,
"MemFree:");
356 if(!freeBytes.has_value())
358 throw std::runtime_error(
359 "Could not read per-node MemFree from /sys/devices/system/node/node" + std::to_string(node->os_index)
364 alpaka::unused(numaIdx);
auto getFreeGlobalMemBytes() -> std::size_t
auto getGlobalMemCapacityBytes() -> std::size_t
auto * toVoidPtr(T inPtr)
Cast a pointer that may or may not point to volatile memory to a (void*) or (void const*).
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept