15# include <sycl/sycl.hpp>
23 template<auto T_syclDim,
typename T_OptimizedThreadSpec>
26 using IdxType =
typename T_OptimizedThreadSpec::NumBlocksVecType::type;
28 sycl::nd_item<T_syclDim>
const& m_item;
29 T_OptimizedThreadSpec
const& m_optimizedThreadSpec;
31 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
34 BlockLayer(sycl::nd_item<T_syclDim>
const& item, T_OptimizedThreadSpec
const& optimizedThreadSpec)
36 , m_optimizedThreadSpec(optimizedThreadSpec)
40 constexpr auto idx() const -> Vec<IdxType, dim>
42 if constexpr(dim == 1)
44 return Vec<IdxType, 1u>{m_item.get_group(0)};
46 else if constexpr(dim == 2)
48 return Vec<IdxType, 2u>{m_item.get_group(0), m_item.get_group(1)};
50 else if constexpr(dim == 3)
52 return Vec<IdxType, 3u>{m_item.get_group(0), m_item.get_group(1), m_item.get_group(2)};
56 return mapToND(m_optimizedThreadSpec.getNumBlocks(),
static_cast<IdxType
>(m_item.get_group(0)));
60 constexpr auto count() const -> Vec<IdxType, dim>
62 if constexpr(dim == 1)
64 return Vec<IdxType, 1u>{m_item.get_group_range(0)};
66 else if constexpr(dim == 2)
68 return Vec<IdxType, 2u>{m_item.get_group_range(0), m_item.get_group_range(1)};
70 else if constexpr(dim == 3)
72 return Vec<IdxType, 3u>{
73 m_item.get_group_range(0),
74 m_item.get_group_range(1),
75 m_item.get_group_range(2)};
79 return m_optimizedThreadSpec.getNumBlocks();
84 template<auto T_syclDim,
typename T_OptimizedThreadSpec>
87 using IdxType =
typename T_OptimizedThreadSpec::NumThreadsVecType::type;
89 sycl::nd_item<T_syclDim>
const& m_item;
90 T_OptimizedThreadSpec
const& m_optimizedThreadSpec;
92 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
95 ThreadLayer(sycl::nd_item<T_syclDim>
const& item, T_OptimizedThreadSpec
const& optimizedThreadSpec)
97 , m_optimizedThreadSpec(optimizedThreadSpec)
101 constexpr auto idx() const -> Vec<IdxType, dim>
103 if constexpr(dim == 1)
105 return Vec<IdxType, 1u>{m_item.get_local_id(0)};
107 else if constexpr(dim == 2)
109 return Vec<IdxType, 2u>{m_item.get_local_id(0), m_item.get_local_id(1)};
111 else if constexpr(dim == 3)
113 return Vec<IdxType, 3u>{m_item.get_local_id(0), m_item.get_local_id(1), m_item.get_local_id(2)};
118 m_optimizedThreadSpec.getNumThreads(),
119 static_cast<IdxType
>(m_item.get_local_id(0)));
123 constexpr auto count() const -> Vec<IdxType, dim>
125 if constexpr(dim == 1)
127 return Vec<IdxType, 1u>{m_item.get_local_range(0)};
129 else if constexpr(dim == 2)
131 return Vec<IdxType, 2u>{m_item.get_local_range(0), m_item.get_local_range(1)};
133 else if constexpr(dim == 3)
135 return Vec<IdxType, 3u>{
136 m_item.get_local_range(0),
137 m_item.get_local_range(1),
138 m_item.get_local_range(2)};
142 return m_optimizedThreadSpec.getNumThreads();
146 constexpr auto count() const
147 requires alpaka::concepts::CVector<typename T_OptimizedThreadSpec::NumThreadsVecType>
149 return typename T_OptimizedThreadSpec::NumThreadsVecType{};
153 template<auto T_syclDim>
156 sycl::nd_item<T_syclDim>
const& m_item;
159 Sync(sycl::nd_item<T_syclDim>
const& item) : m_item(item)
163 void operator()()
const
169 class DynamicSharedMemory
171 sycl::local_accessor<std::byte>
const& m_accessor;
174 DynamicSharedMemory(sycl::local_accessor<std::byte>
const& accessor) : m_accessor(accessor)
178 template<
typename T,
size_t>
179 T* allocDynamic(uint32_t)
181 return reinterpret_cast<T*
>(m_accessor.get_multi_ptr<sycl::access::decorated::no>().get());
184 constexpr size_t byte_size() noexcept
186 return m_accessor.byte_size();
functionality which is usable on the accelerator compute device from within a kernel.
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.