alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
TiledIdxContainer.hpp
Go to the documentation of this file.
1/* Copyright 2024 Andrea Bocci, René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/Vec.hpp"
8#include "alpaka/api/api.hpp"
10#include "alpaka/core/PP.hpp"
15#include "alpaka/tag.hpp"
16#include "alpaka/utility.hpp"
17
18#include <cstdint>
19#include <functional>
20#include <memory>
21#include <ranges>
22#include <sstream>
23
24namespace alpaka::onAcc
25{
26 namespace detail
27 {
28 /** Store reduced vector
29 *
30 * The first index can be reduced by on dimension because the slowest dimension must never set to zero after
31 * the initialization.
32 */
33 template<typename T_Type, uint32_t T_dim>
34 struct ReducedVector : private Vec<T_Type, T_dim - 1u>
35 {
36 constexpr ReducedVector(Vec<T_Type, T_dim> const& first)
37 : Vec<T_Type, T_dim - 1u>{first.template rshrink<T_dim - 1u>()}
38 {
39 }
40
41 constexpr decltype(auto) operator[](T_Type idx) const
42 {
44 }
45
46 constexpr decltype(auto) operator[](T_Type idx)
47 {
49 }
50 };
51
52 template<typename T_Type>
53 struct ReducedVector<T_Type, 1u>
54 {
56 {
57 }
58 };
59 } // namespace detail
60
61 template<
63 typename T_ThreadSpace,
64 typename T_IdxMapperFn,
67 {
68 void _()
69 {
70 static_assert(std::ranges::forward_range<TiledIdxContainer>);
71 static_assert(std::ranges::borrowed_range<TiledIdxContainer>);
72 static_assert(std::ranges::range<TiledIdxContainer>);
73 static_assert(std::ranges::input_range<TiledIdxContainer>);
74 }
75
76 public:
77 using IdxType = typename T_IdxRange::IdxType;
78 static constexpr uint32_t dim = T_IdxRange::dim();
80
82 T_IdxRange const& idxRange,
83 T_ThreadSpace const& threadSpace,
84 T_IdxMapperFn idxMapping,
85 T_CSelect const& = T_CSelect{})
86 : m_idxRange(idxRange)
87 , m_threadSpace{threadSpace}
88 {
89 alpaka::unused(idxMapping);
90 }
91
92 constexpr TiledIdxContainer(TiledIdxContainer const&) = default;
93 constexpr TiledIdxContainer(TiledIdxContainer&&) = default;
94
95 class const_iterator;
96
97 /** special implementation to define the end
98 *
99 * Only a scalar value must be stored which reduce the register footprint.
100 * The definition of end is that the index is behind or equal to the extent of the slowest moving dimension.
101 */
103 {
104 friend class TiledIdxContainer;
105
106 void _()
107 {
108 static_assert(std::forward_iterator<const_iterator_end>);
109 }
110
112 : m_extentSlowDim{extent[T_CSelect{}][0]}
113 {
114 }
115
116 constexpr IdxType operator*() const
117 {
118 return m_extentSlowDim;
119 }
120
121 public:
122 constexpr bool operator==(const_iterator_end const& other) const
123 {
124 return (m_extentSlowDim == other.m_extentSlowDim);
125 }
126
127 constexpr bool operator!=(const_iterator_end const& other) const
128 {
129 return !(*this == other);
130 }
131
132 constexpr bool operator==(const_iterator const& other) const
133 {
134 return (m_extentSlowDim <= other.slowCurrent);
135 }
136
137 constexpr bool operator!=(const_iterator const& other) const
138 {
139 return !(*this == other);
140 }
141
142 private:
144 };
145
147 {
148 friend class TiledIdxContainer;
149 friend class const_iterator_end;
150
151 static constexpr uint32_t iterDim = T_CSelect::dim();
153
154 void _()
155 {
156 static_assert(std::forward_iterator<const_iterator>);
157 static_assert(std::input_iterator<const_iterator>);
158 }
159
160 constexpr const_iterator(
161 alpaka::concepts::Vector auto const offset,
162 alpaka::concepts::Vector auto const first,
163 alpaka::concepts::Vector auto const extent,
164 alpaka::concepts::Vector auto const stride)
165 : m_current{first + offset}
166 , m_stride{stride[T_CSelect{}]}
167 , m_extent{(extent + offset)[T_CSelect{}]}
168 , m_first((m_current)[T_CSelect{}])
169 {
170 // range check required for 1 dimensional iterators
171 if constexpr(iterDim > 1u)
172 {
173 // invalidate current if one dimension is out of range.
174 bool isIndexValid = true;
175 for(uint32_t d = 1u; d < iterDim; ++d)
176 isIndexValid = isIndexValid && (m_first[d] < m_extent[d]);
177 if(!isIndexValid)
178 m_current[T_CSelect{}[0]] = m_extent[0];
179 }
180
181 // std::cout << "const iter " << m_current << m_extent << m_stride << std::endl;
182 }
183
185 {
186 return m_current[T_CSelect{}[0]];
187 }
188
189 public:
190 constexpr IdxVecType operator*() const
191 {
192 return m_current;
193 }
194
195 // pre-increment the iterator
197 {
198 for(uint32_t d = 0; d < iterDim; ++d)
199 {
200 uint32_t const idx = iterDim - 1u - d;
201 m_current[T_CSelect{}[idx]] += m_stride[idx];
202 if constexpr(iterDim != 1u)
203 {
204 if(idx >= 1u && m_current[T_CSelect{}[idx]] >= m_extent[idx])
205 {
206 m_current[T_CSelect{}[idx]] = m_first[idx];
207 }
208 else
209 break;
210 }
211 }
212 return *this;
213 }
214
215 // post-increment the iterator
217 {
218 const_iterator old = *this;
219 ++(*this);
220 return old;
221 }
222
223 constexpr bool operator==(const_iterator const& other) const
224 {
225 return (m_current == other.m_current);
226 }
227
228 constexpr bool operator!=(const_iterator const& other) const
229 {
230 return !(*this == other);
231 }
232
233 constexpr bool operator==(const_iterator_end const& other) const
234 {
235 return (slowCurrent() >= *other);
236 }
237
238 constexpr bool operator!=(const_iterator_end const& other) const
239 {
240 return !(*this == other);
241 }
242
243 private:
244 // modified by the pre/post-increment operator
246 // non-const to support iterator copy and assignment
250 };
251
253 {
254 constexpr auto selectedDims = T_CSelect{};
255 auto [threadIdx, numThreads] = m_threadSpace.mapTo(selectedDims);
256
257 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
258 {
259 return const_iterator(
260 m_idxRange.m_begin,
261 threadIdx * m_idxRange.m_stride,
262 m_idxRange.distance(),
263 numThreads * m_idxRange.m_stride);
264 }
265 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
266 {
267 IdxVecType extent = m_idxRange.distance();
268 IdxVecType logicalExtent = divCeil(extent, m_idxRange.m_stride);
269
270 // elements per slot
271 IdxVecType base = logicalExtent / numThreads;
272 // remainder elements will be given to the slots with id lower than rem
273 IdxVecType rem = logicalExtent % numThreads;
274
275 IdxVecType firstLogical = threadIdx * base + threadIdx.min(rem);
276 IdxVecType first = firstLogical * m_idxRange.m_stride;
277
278 IdxVecType nextThreadIdx = threadIdx + IdxType{1};
279 IdxVecType endLogical = nextThreadIdx * base + nextThreadIdx.min(rem);
280 // crop to the end of the index range
281 IdxVecType end = extent.min(endLogical * m_idxRange.m_stride);
282
283 return const_iterator(m_idxRange.m_begin, first, end, m_idxRange.m_stride);
284 }
285 }
286
288 {
289 constexpr auto selectedDims = T_CSelect{};
290 auto [threadIdx, numThreads] = m_threadSpace.mapTo(selectedDims);
291
292 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
293 {
294 return const_iterator_end(m_idxRange.m_begin + m_idxRange.distance());
295 }
296 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
297 {
298 IdxVecType extent = m_idxRange.distance();
299 IdxVecType logicalExtent = divCeil(extent, m_idxRange.m_stride);
300
301 // elements per slot
302 IdxVecType base = logicalExtent / numThreads;
303 // remainder elements will be given to the slots with id lower than rem
304 IdxVecType rem = logicalExtent % numThreads;
305
306 IdxVecType nextSlotIdx = threadIdx + IdxType{1};
307 IdxVecType endLogical = nextSlotIdx * base + nextSlotIdx.min(rem);
308 // crop to the end of the index range
309 IdxVecType end = extent.min(endLogical * m_idxRange.m_stride);
310
311 return const_iterator_end(m_idxRange.m_begin + end);
312 }
313 }
314
322
323 private:
324 T_IdxRange m_idxRange;
325 T_ThreadSpace m_threadSpace;
326 };
327} // namespace alpaka::onAcc
constexpr bool operator!=(const_iterator_end const &other) const
constexpr bool operator==(const_iterator const &other) const
constexpr bool operator==(const_iterator_end const &other) const
ALPAKA_FN_ACC const_iterator_end(alpaka::concepts::Vector auto const &extent)
constexpr bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC constexpr IdxType slowCurrent() const
constexpr bool operator==(const_iterator_end const &other) const
detail::ReducedVector< IdxType, iterDim > m_first
constexpr bool operator!=(const_iterator_end const &other) const
constexpr bool operator!=(const_iterator const &other) const
constexpr bool operator==(const_iterator const &other) const
constexpr const_iterator(alpaka::concepts::Vector auto const offset, alpaka::concepts::Vector auto const first, alpaka::concepts::Vector auto const extent, alpaka::concepts::Vector auto const stride)
ALPAKA_FN_ACC const_iterator begin() const
constexpr TiledIdxContainer(TiledIdxContainer &&)=default
ALPAKA_FN_ACC TiledIdxContainer(T_IdxRange const &idxRange, T_ThreadSpace const &threadSpace, T_IdxMapperFn idxMapping, T_CSelect const &=T_CSelect{})
ALPAKA_FN_ACC const_iterator_end end() const
typename T_IdxRange::IdxType IdxType
ALPAKA_FN_HOST_ACC constexpr auto operator[](alpaka::concepts::CVector auto const iterDir) const
constexpr TiledIdxContainer(TiledIdxContainer const &)=default
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:30
#define ALPAKA_FN_HOST_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:31
Concept to check if a type is a CVector.
Definition Vec.hpp:74
Concept to check if a type is an index range.
Definition IdxRange.hpp:240
Concept to check if a type is a vector.
Definition Vec.hpp:53
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
ALPAKA_FN_HOST_ACC constexpr auto divCeil(Integral a, Integral b) -> Integral
Returns the ceiling of a / b, as integer.
Definition utility.hpp:34
constexpr auto min(Vec< T_Type, T_dim, T_OtherStorage > const &rhs) const
Definition Vec.hpp:570
constexpr Vec< T_Type, T_numElements > rshrink() const
Definition Vec.hpp:409
constexpr decltype(auto) operator[](std::integral auto const idx)
Definition Vec.hpp:357
constexpr ReducedVector(Vec< T_Type, 1u > const &)
constexpr ReducedVector(Vec< T_Type, T_dim > const &first)