alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
onAcc.hpp
Go to the documentation of this file.
1/* Copyright 2025 Simeon Ehrig
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/Vec.hpp"
11#include "alpaka/tag.hpp"
12
13#if ALPAKA_LANG_SYCL
14
15# include <sycl/sycl.hpp>
16
17# include <functional>
18
19namespace alpaka::onAcc
20{
21 namespace syclGeneric
22 {
23 template<auto T_syclDim, typename T_OptimizedThreadSpec>
24 class BlockLayer
25 {
26 using IdxType = typename T_OptimizedThreadSpec::NumBlocksVecType::type;
27
28 sycl::nd_item<T_syclDim> const& m_item;
29 T_OptimizedThreadSpec const& m_optimizedThreadSpec;
30 // dimension of the alpaka objects
31 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
32
33 public:
34 BlockLayer(sycl::nd_item<T_syclDim> const& item, T_OptimizedThreadSpec const& optimizedThreadSpec)
35 : m_item(item)
36 , m_optimizedThreadSpec(optimizedThreadSpec)
37 {
38 }
39
40 constexpr auto idx() const -> Vec<IdxType, dim>
41 {
42 if constexpr(dim == 1)
43 {
44 return Vec<IdxType, 1u>{m_item.get_group(0)};
45 }
46 else if constexpr(dim == 2)
47 {
48 return Vec<IdxType, 2u>{m_item.get_group(0), m_item.get_group(1)};
49 }
50 else if constexpr(dim == 3)
51 {
52 return Vec<IdxType, 3u>{m_item.get_group(0), m_item.get_group(1), m_item.get_group(2)};
53 }
54 else
55 {
56 return mapToND(m_optimizedThreadSpec.getNumBlocks(), static_cast<IdxType>(m_item.get_group(0)));
57 }
58 }
59
60 constexpr auto count() const -> Vec<IdxType, dim>
61 {
62 if constexpr(dim == 1)
63 {
64 return Vec<IdxType, 1u>{m_item.get_group_range(0)};
65 }
66 else if constexpr(dim == 2)
67 {
68 return Vec<IdxType, 2u>{m_item.get_group_range(0), m_item.get_group_range(1)};
69 }
70 else if constexpr(dim == 3)
71 {
72 return Vec<IdxType, 3u>{
73 m_item.get_group_range(0),
74 m_item.get_group_range(1),
75 m_item.get_group_range(2)};
76 }
77 else
78 {
79 return m_optimizedThreadSpec.getNumBlocks();
80 }
81 }
82 };
83
84 template<auto T_syclDim, typename T_OptimizedThreadSpec>
85 class ThreadLayer
86 {
87 using IdxType = typename T_OptimizedThreadSpec::NumThreadsVecType::type;
88
89 sycl::nd_item<T_syclDim> const& m_item;
90 T_OptimizedThreadSpec const& m_optimizedThreadSpec;
91 // dimension of the alpaka objects
92 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
93
94 public:
95 ThreadLayer(sycl::nd_item<T_syclDim> const& item, T_OptimizedThreadSpec const& optimizedThreadSpec)
96 : m_item(item)
97 , m_optimizedThreadSpec(optimizedThreadSpec)
98 {
99 }
100
101 constexpr auto idx() const -> Vec<IdxType, dim>
102 {
103 if constexpr(dim == 1)
104 {
105 return Vec<IdxType, 1u>{m_item.get_local_id(0)};
106 }
107 else if constexpr(dim == 2)
108 {
109 return Vec<IdxType, 2u>{m_item.get_local_id(0), m_item.get_local_id(1)};
110 }
111 else if constexpr(dim == 3)
112 {
113 return Vec<IdxType, 3u>{m_item.get_local_id(0), m_item.get_local_id(1), m_item.get_local_id(2)};
114 }
115 else
116 {
117 return mapToND(
118 m_optimizedThreadSpec.getNumThreads(),
119 static_cast<IdxType>(m_item.get_local_id(0)));
120 }
121 }
122
123 constexpr auto count() const -> Vec<IdxType, dim>
124 {
125 if constexpr(dim == 1)
126 {
127 return Vec<IdxType, 1u>{m_item.get_local_range(0)};
128 }
129 else if constexpr(dim == 2)
130 {
131 return Vec<IdxType, 2u>{m_item.get_local_range(0), m_item.get_local_range(1)};
132 }
133 else if constexpr(dim == 3)
134 {
135 return Vec<IdxType, 3u>{
136 m_item.get_local_range(0),
137 m_item.get_local_range(1),
138 m_item.get_local_range(2)};
139 }
140 else
141 {
142 return m_optimizedThreadSpec.getNumThreads();
143 }
144 }
145
146 constexpr auto count() const
147 requires alpaka::concepts::CVector<typename T_OptimizedThreadSpec::NumThreadsVecType>
148 {
149 return typename T_OptimizedThreadSpec::NumThreadsVecType{};
150 }
151 };
152
153 template<auto T_syclDim>
154 class Sync
155 {
156 sycl::nd_item<T_syclDim> const& m_item;
157
158 public:
159 Sync(sycl::nd_item<T_syclDim> const& item) : m_item(item)
160 {
161 }
162
163 void operator()() const
164 {
165 m_item.barrier();
166 }
167 };
168
169 class DynamicSharedMemory
170 {
171 sycl::local_accessor<std::byte> const& m_accessor;
172
173 public:
174 DynamicSharedMemory(sycl::local_accessor<std::byte> const& accessor) : m_accessor(accessor)
175 {
176 }
177
178 template<typename T, size_t>
179 T* allocDynamic(uint32_t)
180 {
181 return reinterpret_cast<T*>(m_accessor.get_multi_ptr<sycl::access::decorated::no>().get());
182 }
183
184 constexpr size_t byte_size() noexcept
185 {
186 return m_accessor.byte_size();
187 }
188 };
189 } // namespace syclGeneric
190} // namespace alpaka::onAcc
191
192#endif
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.
Definition Vec.hpp:873