alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Platform.hpp
Go to the documentation of this file.
1/* Copyright 2025 Simeon Ehrig
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
12
13#if ALPAKA_LANG_SYCL
14
15# include <sycl/sycl.hpp>
16
17# include <map>
18# include <memory>
19# include <numeric>
20# include <optional>
21
22namespace alpaka
23{
24 namespace detail
25 {
26 template<typename T_DeviceKind>
27 struct SYCLDeviceSelector;
28
29 struct Context
30 {
31 Context() = default;
32
33 sycl::platform getPlatformByName(std::string const& platformName)
34 {
35 auto platforms = sycl::platform::get_platforms();
36
37 for(auto const& platform : platforms)
38 {
39 if(platform.get_info<sycl::info::platform::name>() == platformName)
40 {
41 return platform;
42 }
43 }
44
45 throw std::runtime_error("Platform not found");
46 }
47
48 auto getContext(sycl::platform platform)
49 {
50 std::string platformName = platform.get_info<sycl::info::platform::name>();
51 if(contextMap.contains(platformName))
52 {
53 return contextMap[platformName];
54 }
55
56 std::vector<sycl::device> devices;
57 try
58 {
59 devices = platform.get_devices();
60 }
61 catch(...)
62 {
63 devices.clear();
64 }
65 if(devices.size())
66 {
67 auto context = sycl::context{
68 platform.get_devices(),
69 [](sycl::exception_list exceptions)
70 {
71 auto ss_err = std::stringstream{};
72 ss_err << "Caught asynchronous SYCL exception(s):\n";
73 for(std::exception_ptr e : exceptions)
74 {
75 try
76 {
77 std::rethrow_exception(e);
78 }
79 catch(sycl::exception const& err)
80 {
81 ss_err << err.what() << " (" << err.code() << ")\n";
82 }
83 }
84 throw std::runtime_error(ss_err.str());
85 }};
86 return contextMap[platformName] = context;
87 }
88 return sycl::context{};
89 }
90
91 std::map<std::string, sycl::context> contextMap;
92 };
93 } // namespace detail
94
95 namespace onHost
96
97 {
98 namespace syclGeneric
99 {
100 template<typename T_ApiInterface, alpaka::concepts::DeviceKind T_DeviceKind>
101 struct Platform : std::enable_shared_from_this<Platform<T_ApiInterface, T_DeviceKind>>
102 {
103 private:
104 /** Checks if kernels can be compiled for the context and given list of devices
105 *
106 * It is possible that we can create a context and a list of devices for an API and device kind.
107 * That does not mean we can compile kernels for the devices.
108 * A reason can be:
109 * - the compile flags are not set to build kernels for the given device kind
110 * - dependencies e.g. CUDA/HIP are not available
111 *
112 * @return true if we can build kernels for the devices, else false.
113 */
114 bool checkIfKernelsCanBeCompiled(std::vector<sycl::device> const& devs, sycl::context ctx)
115 {
116 if(devs.empty())
117 return false;
118 try
119 {
120 auto kernelIds = sycl::get_kernel_ids();
121
122 // No application kernels exist, so there is nothing to validate.
123 if(kernelIds.empty())
124 return true;
125 // Check if we have already pre-compiled binaries/executables for the devices.
126 if(sycl::has_kernel_bundle<sycl::bundle_state::executable>(ctx, devs))
127 {
128 // an executable exists already
129 return true;
130 }
131 // Check if we can compile for the devices.
132 if(sycl::has_kernel_bundle<sycl::bundle_state::input>(ctx, devs))
133 {
134 auto input = sycl::get_kernel_bundle<sycl::bundle_state::input>(ctx, devs);
135
136 auto executable = sycl::build(input, devs);
137
138 // return true if we can build the kernels for the devices
139 return !executable.empty();
140 }
141 return false;
142 }
143 catch(...)
144 {
145 return false;
146 }
147 }
148
149 public:
150 Platform() : contextManager{make_sharedSingleton<detail::Context>()}
151 {
152 try
153 {
154 syclPlatform = sycl::platform{detail::SYCLDeviceSelector<T_DeviceKind>{}};
155 syclDevices = syclPlatform->get_devices();
156 devices.resize(syclDevices.size());
157 syclContext = contextManager->getContext(syclPlatform.value());
158
159 /* If no call before fired an exception we need to check if we can build kernels for the
160 * context and devices. If we are not able to compile kernels for the devices, we throw
161 * to reset the context and device list.
162 */
163 if(!checkIfKernelsCanBeCompiled(syclDevices, *syclContext))
164 {
165 auto msg
166 = (std::string("kernel_bundle_building for ") + T_ApiInterface::getName() + "and "
167 + T_DeviceKind::getName() + " failed");
168 throw std::runtime_error(msg);
169 }
170 }
171 catch(...)
172 {
173 /* Reset all members, to show that the platform does not have a valid context and devices.
174 * If later the number of devices is queried it will return that zero devices are available.
175 */
176 syclContext.reset();
177 syclPlatform.reset();
178 syclDevices.clear();
179 devices.clear();
180 }
181 }
182
183 Platform(Platform const&) = delete;
184 Platform& operator=(Platform const&) = delete;
185
186 Platform(Platform&&) = delete;
187 Platform& operator=(Platform&&) = delete;
188
189 std::shared_ptr<Platform<T_ApiInterface, T_DeviceKind>> getSharedPtr()
190 {
191 return this->shared_from_this();
192 }
193
194 auto getContext() const
195 {
196 if(!syclContext.has_value())
197 throw std::runtime_error("The underlying SYCL context is invalid.");
198 return syclContext.value();
199 }
200
201 uint32_t getDeviceCount() const
202 {
204 constexpr bool isSupportedDev = trait::IsDeviceSupportedBy::
205 Op<T_DeviceKind, ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval<Platform>()))>::value;
206 if constexpr(isSupportedDev)
207 {
208 auto numDevices = devices.size();
209 return static_cast<uint32_t>(numDevices);
210 }
211 return 0u;
212 }
213
214 Handle<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>> makeDevice(uint32_t const& idx)
215 {
217 uint32_t const numDevices = getDeviceCount();
218 if(idx >= numDevices)
219 {
220 std::stringstream ssErr;
221 ssErr << "Unable to return device handle for SYCL device with index " << idx
222 << " because there are only " << numDevices << " devices!";
223 throw std::runtime_error(ssErr.str());
224 }
225
226 std::lock_guard<std::mutex> lk{deviceGuard};
227
228 if(auto sharedPtr = devices[idx].lock())
229 {
230 return sharedPtr;
231 }
232
233 auto newDevice = std::make_shared<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>>(
234 std::move(getSharedPtr()),
235 syclDevices[idx],
236 idx);
237 devices[idx] = newDevice;
238 return newDevice;
239 }
240
241 static constexpr auto getName()
242 {
243 return onHost::demangledName<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>();
244 }
245
246 friend struct internal::GetDeviceProperties::Op<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>;
247
248 private:
249 friend struct onHost::internal::IsDataAccessible;
250 friend struct GetDeviceProperties;
251
252 // The context manager is required to be able to use the same sycl context for different device types
253 std::shared_ptr<alpaka::detail::Context> contextManager;
254 std::optional<sycl::context> syclContext;
255 // native sycl platform for the corresponding device kind this platform is representing
256 std::optional<sycl::platform> syclPlatform;
257 // native sycl devices for the corresponding device kind this platform is representing
258 std::vector<sycl::device> syclDevices;
259 // alpaka devices for the internal hierarchy
260 std::vector<std::weak_ptr<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>>> devices;
261
262 std::mutex deviceGuard;
263
264 void _()
265 {
266 static_assert(internal::concepts::Platform<Platform>);
267 }
268 };
269 } // namespace syclGeneric
270
271 namespace internal
272 {
273 template<typename T_ApiInterface, alpaka::concepts::DeviceKind T_DeviceKind>
274 struct GetDeviceProperties::Op<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>
275 {
276 DeviceProperties operator()(
277 syclGeneric::Platform<T_ApiInterface, T_DeviceKind> const& platform,
278 uint32_t deviceIdx) const
279 {
281 if(deviceIdx >= platform.syclDevices.size())
282 {
283 std::stringstream ssErr;
284 ssErr << "Unable to return device properties for SYCL device with index " << deviceIdx
285 << " because there are only " << platform.getDeviceCount() << " devices!";
286 throw std::runtime_error(ssErr.str());
287 }
288 sycl::device const dev = platform.syclDevices[deviceIdx];
289
290 auto prop = DeviceProperties{};
291 prop.name = dev.get_info<sycl::info::device::name>();
292 std::vector<std::size_t> wrap_sizes = dev.get_info<sycl::info::device::sub_group_sizes>();
293 // @todo do not reduce wrap size to a single value, return all values
294 prop.warpSize = static_cast<uint32_t>(std::reduce(
295 wrap_sizes.begin(),
296 wrap_sizes.end(),
297 std::size_t{0},
298 [](std::size_t a, std::size_t b)
299 {
300 // The CPU runtime supports a sub-group size of 64, but the SYCL implementation
301 // currently does not
302 if constexpr(T_DeviceKind{} == deviceKind::cpu)
303 return std::max(a, b) <= 32 ? std::max(a, b) : 32;
304 else
305 return std::max(a, b);
306 }));
307 prop.multiProcessorCount = dev.get_info<sycl::info::device::max_compute_units>();
308 prop.globalMemCapacityBytes = dev.get_info<sycl::info::device::global_mem_size>();
309 prop.sharedMemPerBlockBytes = dev.get_info<sycl::info::device::local_mem_size>();
310
311 prop.maxThreadsPerBlock = dev.get_info<sycl::info::device::max_work_group_size>();
312 // will be copied into the lampda
313 auto syclMaxThreadsPerBlock = dev.get_info<sycl::info::device::max_work_item_sizes<3>>();
314 // in sycl index order == alpaka index order
315 prop.fnMaxThreadsPerBlock = [maxThreadsPerBlock = prop.maxThreadsPerBlock,
316 syclMaxThreadsPerBlock](uint32_t* data, uint32_t numDims)
317 {
318 if(numDims <= 3u)
319 {
320 for(uint32_t d = 0u; d < numDims; ++d)
321 data[numDims - 1u - d] = syclMaxThreadsPerBlock[3u - 1u - d];
322 }
323 else
324 {
325 /* For more than 3 dimensions alpaka is linearizing to one dimension, therefore we use the
326 * maximum for each dimension. */
327 for(uint32_t d = 0u; d < numDims; ++d)
328 data[d] = maxThreadsPerBlock;
329 }
330 };
331
332 prop.maxBlocksPerGrid = std::numeric_limits<uint32_t>::max();
333 prop.fnMaxBlocksPerGrid = [](uint32_t* data, uint32_t numDims)
334 {
335 for(uint32_t d = 0u; d < numDims; ++d)
336 data[d] = std::numeric_limits<uint32_t>::max();
337 };
338
339
340 return prop;
341 }
342 };
343 } // namespace internal
344
345 } // namespace onHost
346} // namespace alpaka
347#endif
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95
constexpr auto cpu
Definition tag.hpp:170
constexpr auto device
Definition lvl.hpp:82
Functionality which is usable on the host CPU controller thread.
Definition api.hpp:40
decltype(auto) data(auto &&any)
pointer to data of an object
std::convertible_to< std::string > auto getName(auto &&any)
Runtime name for a given object.
Definition interface.hpp:96
auto make_sharedSingleton(T_Args &&... args)
Definition Handle.hpp:14
main alpaka namespace.
Definition alpaka.hpp:76
DeviceProperties operator()(auto const &platform, uint32_t idx) const