15# include <sycl/sycl.hpp>
26 template<
typename T_DeviceKind>
27 struct SYCLDeviceSelector;
33 sycl::platform getPlatformByName(std::string
const& platformName)
35 auto platforms = sycl::platform::get_platforms();
37 for(
auto const& platform : platforms)
39 if(platform.get_info<sycl::info::platform::name>() == platformName)
45 throw std::runtime_error(
"Platform not found");
48 auto getContext(sycl::platform platform)
50 std::string platformName = platform.get_info<sycl::info::platform::name>();
51 if(contextMap.contains(platformName))
53 return contextMap[platformName];
56 std::vector<sycl::device> devices;
59 devices = platform.get_devices();
67 auto context = sycl::context{
68 platform.get_devices(),
69 [](sycl::exception_list exceptions)
71 auto ss_err = std::stringstream{};
72 ss_err <<
"Caught asynchronous SYCL exception(s):\n";
73 for(std::exception_ptr e : exceptions)
77 std::rethrow_exception(e);
79 catch(sycl::exception
const& err)
81 ss_err << err.what() <<
" (" << err.code() <<
")\n";
84 throw std::runtime_error(ss_err.str());
86 return contextMap[platformName] = context;
88 return sycl::context{};
91 std::map<std::string, sycl::context> contextMap;
100 template<
typename T_ApiInterface, alpaka::concepts::DeviceKind T_DeviceKind>
101 struct Platform : std::enable_shared_from_this<Platform<T_ApiInterface, T_DeviceKind>>
114 bool checkIfKernelsCanBeCompiled(std::vector<sycl::device>
const& devs, sycl::context ctx)
120 auto kernelIds = sycl::get_kernel_ids();
123 if(kernelIds.empty())
126 if(sycl::has_kernel_bundle<sycl::bundle_state::executable>(ctx, devs))
132 if(sycl::has_kernel_bundle<sycl::bundle_state::input>(ctx, devs))
134 auto input = sycl::get_kernel_bundle<sycl::bundle_state::input>(ctx, devs);
136 auto executable = sycl::build(input, devs);
139 return !executable.empty();
154 syclPlatform = sycl::platform{detail::SYCLDeviceSelector<T_DeviceKind>{}};
155 syclDevices = syclPlatform->get_devices();
156 devices.resize(syclDevices.size());
157 syclContext = contextManager->getContext(syclPlatform.value());
163 if(!checkIfKernelsCanBeCompiled(syclDevices, *syclContext))
166 = (std::string(
"kernel_bundle_building for ") + T_ApiInterface::getName() +
"and "
167 + T_DeviceKind::getName() +
" failed");
168 throw std::runtime_error(msg);
177 syclPlatform.reset();
183 Platform(Platform
const&) =
delete;
184 Platform& operator=(Platform
const&) =
delete;
186 Platform(Platform&&) =
delete;
187 Platform& operator=(Platform&&) =
delete;
189 std::shared_ptr<Platform<T_ApiInterface, T_DeviceKind>> getSharedPtr()
191 return this->shared_from_this();
194 auto getContext()
const
196 if(!syclContext.has_value())
197 throw std::runtime_error(
"The underlying SYCL context is invalid.");
198 return syclContext.value();
201 uint32_t getDeviceCount()
const
204 constexpr bool isSupportedDev = trait::IsDeviceSupportedBy::
205 Op<T_DeviceKind, ALPAKA_TYPEOF(alpaka::internal::getApi(std::declval<Platform>()))>::value;
206 if constexpr(isSupportedDev)
208 auto numDevices = devices.size();
209 return static_cast<uint32_t
>(numDevices);
214 Handle<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>> makeDevice(uint32_t
const& idx)
217 uint32_t
const numDevices = getDeviceCount();
218 if(idx >= numDevices)
220 std::stringstream ssErr;
221 ssErr <<
"Unable to return device handle for SYCL device with index " << idx
222 <<
" because there are only " << numDevices <<
" devices!";
223 throw std::runtime_error(ssErr.str());
226 std::lock_guard<std::mutex> lk{deviceGuard};
228 if(
auto sharedPtr = devices[idx].lock())
233 auto newDevice = std::make_shared<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>>(
234 std::move(getSharedPtr()),
237 devices[idx] = newDevice;
241 static constexpr auto getName()
243 return onHost::demangledName<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>();
246 friend struct internal::GetDeviceProperties::Op<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>;
249 friend struct onHost::internal::IsDataAccessible;
250 friend struct GetDeviceProperties;
253 std::shared_ptr<alpaka::detail::Context> contextManager;
254 std::optional<sycl::context> syclContext;
256 std::optional<sycl::platform> syclPlatform;
258 std::vector<sycl::device> syclDevices;
260 std::vector<std::weak_ptr<syclGeneric::Device<Platform<T_ApiInterface, T_DeviceKind>>>> devices;
262 std::mutex deviceGuard;
266 static_assert(internal::concepts::Platform<Platform>);
273 template<
typename T_ApiInterface, alpaka::concepts::DeviceKind T_DeviceKind>
274 struct GetDeviceProperties::Op<syclGeneric::Platform<T_ApiInterface, T_DeviceKind>>
276 DeviceProperties operator()(
277 syclGeneric::Platform<T_ApiInterface, T_DeviceKind>
const& platform,
278 uint32_t deviceIdx)
const
281 if(deviceIdx >= platform.syclDevices.size())
283 std::stringstream ssErr;
284 ssErr <<
"Unable to return device properties for SYCL device with index " << deviceIdx
285 <<
" because there are only " << platform.getDeviceCount() <<
" devices!";
286 throw std::runtime_error(ssErr.str());
288 sycl::device
const dev = platform.syclDevices[deviceIdx];
290 auto prop = DeviceProperties{};
291 prop.name = dev.get_info<sycl::info::device::name>();
292 std::vector<std::size_t> wrap_sizes = dev.get_info<sycl::info::device::sub_group_sizes>();
294 prop.warpSize =
static_cast<uint32_t
>(std::reduce(
298 [](std::size_t a, std::size_t b)
303 return std::max(a, b) <= 32 ? std::max(a, b) : 32;
305 return std::max(a, b);
307 prop.multiProcessorCount = dev.get_info<sycl::info::device::max_compute_units>();
308 prop.globalMemCapacityBytes = dev.get_info<sycl::info::device::global_mem_size>();
309 prop.sharedMemPerBlockBytes = dev.get_info<sycl::info::device::local_mem_size>();
311 prop.maxThreadsPerBlock = dev.get_info<sycl::info::device::max_work_group_size>();
313 auto syclMaxThreadsPerBlock = dev.get_info<sycl::info::device::max_work_item_sizes<3>>();
315 prop.fnMaxThreadsPerBlock = [maxThreadsPerBlock = prop.maxThreadsPerBlock,
316 syclMaxThreadsPerBlock](uint32_t*
data, uint32_t numDims)
320 for(uint32_t d = 0u; d < numDims; ++d)
321 data[numDims - 1u - d] = syclMaxThreadsPerBlock[3u - 1u - d];
327 for(uint32_t d = 0u; d < numDims; ++d)
328 data[d] = maxThreadsPerBlock;
332 prop.maxBlocksPerGrid = std::numeric_limits<uint32_t>::max();
333 prop.fnMaxBlocksPerGrid = [](uint32_t*
data, uint32_t numDims)
335 for(uint32_t d = 0u; d < numDims; ++d)
336 data[d] = std::numeric_limits<uint32_t>::max();
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Functionality which is usable on the host CPU controller thread.
decltype(auto) data(auto &&any)
pointer to data of an object
std::convertible_to< std::string > auto getName(auto &&any)
Runtime name for a given object.
auto make_sharedSingleton(T_Args &&... args)