alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
utility.hpp
Go to the documentation of this file.
1/* Copyright 2026 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/unused.hpp"
11
12#include <fstream>
13#include <optional>
14#include <sstream>
15#include <string>
16#include <thread>
17
18/** Implement functions required to set thread affinity and pin memory.
19 *
20 * There is always a fallback implement to be able to run without hwloc.
21 * In this case nume selection is not possible and all cores will taken into account.
22 */
23namespace alpaka::onHost::internal::hwloc
24{
25 /** Constant to select all NUMA domains.
26 *
27 * Within alpaka we work always with the numa domain index.
28 * Any code setting properties based on the numa domain index should compare first against this value and use all
29 * cores if the numa index is equal to this value.
30 */
31 constexpr uint32_t allNumaDomains = std::numeric_limits<uint32_t>::max();
32
33#if ALPAKA_HAS_HWLOC
34 /** Helper singleton to cache the hwloc topology.
35 *
36 * Caching is required to reduce the overhead for repeating operations.
37 * Building the topology can be expensive.
38 */
39 class TopologyCache
40 {
41 public:
42 static TopologyCache& instance()
43 {
44 static TopologyCache topology;
45 return topology;
46 }
47
48 hwloc_topology_t get() const noexcept
49 {
50 return m_topology;
51 }
52
53 hwloc_obj_t getNumaObj(uint32_t numaIdx) const
54 {
55 hwloc_obj_t obj = hwloc_get_obj_by_type(m_topology, HWLOC_OBJ_NUMANODE, static_cast<unsigned>(numaIdx));
56 if(obj == nullptr)
57 {
58 throw std::out_of_range("NUMA domain index out of range: " + std::to_string(numaIdx));
59 }
60 return obj;
61 }
62
63 uint32_t getNumNumaDomains() const
64 {
65 int const count = hwloc_get_nbobjs_by_type(m_topology, HWLOC_OBJ_NUMANODE);
66 if(count < 0)
67 {
68 throw std::runtime_error("hwloc_get_nbobjs_by_type(HWLOC_OBJ_NUMANODE) failed");
69 }
70 return static_cast<uint32_t>(count);
71 }
72
73 private:
74 TopologyCache()
75 {
76 if(hwloc_topology_init(&m_topology) != 0)
77 {
78 throw std::runtime_error("hwloc_topology_init failed");
79 }
80 if(hwloc_topology_load(m_topology) != 0)
81 {
82 hwloc_topology_destroy(m_topology);
83 throw std::runtime_error("hwloc_topology_load failed");
84 }
85 }
86
87 ~TopologyCache()
88 {
89 if(m_topology != nullptr)
90 {
91 hwloc_topology_destroy(m_topology);
92 }
93 }
94
95 TopologyCache(TopologyCache const&) = delete;
96 TopologyCache& operator=(TopologyCache const&) = delete;
97 TopologyCache(TopologyCache&&) = delete;
98 TopologyCache& operator=(TopologyCache&&) = delete;
99
100 private:
101 hwloc_topology_t m_topology{};
102 };
103
104 [[noreturn]] inline void throwErrno(char const* what)
105 {
106 throw std::runtime_error(std::string(what) + ": " + std::strerror(errno));
107 }
108
109 /** Shorthand to get the cached hwloc topology */
110 inline hwloc_topology_t getTopology()
111 {
112 return TopologyCache::instance().get();
113 }
114
115 /** Get an hwloc NUMA object */
116 inline hwloc_obj_t getNumaObj(uint32_t numaIdx)
117 {
118 return TopologyCache::instance().getNumaObj(numaIdx);
119 }
120#endif
121
122 /** Get the number of NUMA domains. */
123 inline uint32_t getNumNumaDomains()
124 {
125#if ALPAKA_HAS_HWLOC
126 return TopologyCache::instance().getNumNumaDomains();
127#else
128 return 1;
129#endif
130 }
131
132 /** Parse the OS NUMA information.
133 *
134 * hwloc is not providing the available free memory in a numa domain.
135 * Therefor we fall back to check the NUMA node information in the OS directly.
136 *
137 * @param osNodeIndex The index of the numa domain in the OS.
138 * @param key The key value you want to read out e.g. 'MemFree:' or 'HugePages_Total:'
139 */
140 inline std::optional<size_t> parseNodeMemInfoValueBytes(unsigned osNodeIndex, std::string_view key)
141 {
142 std::ifstream in("/sys/devices/system/node/node" + std::to_string(osNodeIndex) + "/meminfo");
143 if(!in)
144 {
145 return std::nullopt;
146 }
147
148 std::string line;
149 while(std::getline(in, line))
150 {
151 if(line.find(std::string(key)) == std::string::npos)
152 {
153 continue;
154 }
155
156 // Example line:
157 // Node 0 MemFree: 123456 kB
158 std::istringstream iss(line);
159 std::string nodeWord;
160 unsigned nodeNumber = 0;
161 std::string field;
162 size_t valueKB = 0;
163 std::string unit;
164 if(iss >> nodeWord >> nodeNumber >> field >> valueKB >> unit)
165 {
166 if(field == key && unit == "kB")
167 {
168 return valueKB * 1024ULL;
169 }
170 }
171 }
172
173 return std::nullopt;
174 }
175
176 /** Set the affinity of the current thread to all cores of the NUMA domain
177 *
178 * @param numaIdx numa index starting with zero, or allNumaDomains to use all cores
179 */
180 inline void setThreadAffinity(uint32_t numaIdx)
181 {
182#if ALPAKA_HAS_HWLOC
183 hwloc_cpuset_t cpuset = nullptr;
184
185 if(numaIdx == allNumaDomains)
186 {
187 hwloc_const_cpuset_t const fullSet = hwloc_topology_get_complete_cpuset(getTopology());
188 if(fullSet == nullptr)
189 {
190 throw std::runtime_error("Topology has no complete cpuset");
191 }
192
193 cpuset = hwloc_bitmap_dup(fullSet);
194 }
195 else
196 {
197 hwloc_obj_t const node = getNumaObj(numaIdx);
198 if(node->cpuset == nullptr)
199 {
200 throw std::runtime_error("NUMA node has no cpuset");
201 }
202
203 cpuset = hwloc_bitmap_dup(node->cpuset);
204 }
205
206 if(cpuset == nullptr)
207 {
208 throw std::bad_alloc();
209 }
210
211 int const rc = hwloc_set_cpubind(getTopology(), cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
212
213 hwloc_bitmap_free(cpuset);
214
215 if(rc != 0)
216 {
217 throwErrno("hwloc_set_cpubind failed");
218 }
219#else
220 alpaka::unused(numaIdx);
221 return;
222#endif
223 }
224
225 /** Set the NUMA domain for the memory range described by ptr and bytes
226 *
227 * @attention This method should be called before the memory is touched, else it has no effect.
228 *
229 * @param ptr pointer address to pin, nullptr are valid input
230 * @param bytes the number of bytes to pin starting from the ptr address
231 * @param numaIdx numa index starting with zero, or allNumaDomains to not pin anything.
232 */
233 template<typename T>
234 inline void pinPointer(T* const ptr, size_t bytes, uint32_t numaIdx)
235 {
236#if ALPAKA_HAS_HWLOC
237 if(numaIdx == allNumaDomains)
238 return;
239
240 if(ptr == nullptr || bytes == 0u)
241 return;
242
243 hwloc_obj_t const node = getNumaObj(numaIdx);
244 if(node->nodeset == nullptr)
245 {
246 throw std::runtime_error("NUMA node has no nodeset");
247 }
248
249 hwloc_nodeset_t nodeset = hwloc_bitmap_dup(node->nodeset);
250 if(nodeset == nullptr)
251 {
252 throw std::bad_alloc();
253 }
254
255 int const rc = hwloc_set_area_membind(
256 getTopology(),
258 bytes,
259 nodeset,
260 HWLOC_MEMBIND_BIND,
261 HWLOC_MEMBIND_BYNODESET | HWLOC_MEMBIND_STRICT);
262
263 hwloc_bitmap_free(nodeset);
264
265 if(rc != 0)
266 {
267# ifdef ALPAKA_HOST_MEM_PINNING_CAN_FAIL
268 // missing privileges, e.g. within a container
269 bool const operationNotSupported = errno == EPERM;
270 // unsupported platform
271 bool const functionNotImplemented = errno == ENOSYS;
272 // NUMA node is not allowed by cpuset/cgroup
273 bool const operationNotAllowed = errno == EXDEV;
274 if(operationNotSupported || functionNotImplemented || operationNotAllowed)
275 {
276 return;
277 }
278# endif
279 throwErrno("hwloc_set_area_membind failed");
280 }
281#else
282 alpaka::unused(ptr, bytes, numaIdx);
283 return;
284#endif
285 }
286
287 /** Return the number of cores which has direct access to the numa domain
288 *
289 * Here "cores" means logical CPUs / processing units, so SMT siblings are counted too.
290 *
291 * @param numaIdx numa index starting with zero, or allNumaDomains to the C++ hardware concurrency.
292 */
293 inline uint32_t getNumCores(uint32_t numaIdx)
294 {
295#if ALPAKA_HAS_HWLOC
296 if(numaIdx == allNumaDomains)
297 return std::thread::hardware_concurrency();
298
299 hwloc_obj_t const node = getNumaObj(numaIdx);
300 if(node->cpuset == nullptr)
301 {
302 throw std::runtime_error("NUMA node has no cpuset");
303 }
304
305 int const numPUs = hwloc_bitmap_weight(node->cpuset);
306 if(numPUs < 0)
307 {
308 throw std::runtime_error("hwloc_bitmap_weight failed");
309 }
310
311 return static_cast<uint32_t>(numPUs);
312#else
313 alpaka::unused(numaIdx);
314 return std::thread::hardware_concurrency();
315#endif
316 }
317
318 /** Return the number of bytes of the numa domain
319 *
320 * @param numaIdx numa index starting with zero, or allNumaDomains to get total CPU memory capacity.
321 */
322 inline size_t getMemCapacityBytes(uint32_t numaIdx)
323 {
324#if ALPAKA_HAS_HWLOC
325 if(numaIdx == allNumaDomains)
327
328 hwloc_obj_t const node = getNumaObj(numaIdx);
329 if(node->attr == nullptr)
330 {
331 throw std::runtime_error("NUMA node has no attributes");
332 }
333
334 return static_cast<size_t>(node->attr->numanode.local_memory);
335
336#else
337 alpaka::unused(numaIdx);
339#endif
340 }
341
342 /** Return the number of free bytes in the numa domain.
343 *
344 * Linux-only implementation via /sys/devices/system/node/nodeX/meminfo
345 *
346 * @param numaIdx numa index starting with zero, or allNumaDomains to get total free CPU memory.
347 */
348 inline size_t getFreeGlobalMemBytes(uint32_t numaIdx)
349 {
350#if ALPAKA_HAS_HWLOC
351 if(numaIdx == allNumaDomains)
353
354 hwloc_obj_t const node = getNumaObj(numaIdx);
355 auto const freeBytes = parseNodeMemInfoValueBytes(node->os_index, "MemFree:");
356 if(!freeBytes.has_value())
357 {
358 throw std::runtime_error(
359 "Could not read per-node MemFree from /sys/devices/system/node/node" + std::to_string(node->os_index)
360 + "/meminfo");
361 }
362 return *freeBytes;
363#else
364 alpaka::unused(numaIdx);
366#endif
367 }
368} // namespace alpaka::onHost::internal::hwloc
auto getFreeGlobalMemBytes() -> std::size_t
Definition sysInfo.hpp:210
auto getGlobalMemCapacityBytes() -> std::size_t
Definition sysInfo.hpp:147
auto * toVoidPtr(T inPtr)
Cast a pointer that may or may not point to volatile memory to a (void*) or (void const*).
Definition util.hpp:34
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
Definition Dict.hpp:151