alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
FlatIdxContainer.hpp
Go to the documentation of this file.
1/* Copyright 2024 Andrea Bocci, René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/Vec.hpp"
8#include "alpaka/api/api.hpp"
10#include "alpaka/core/PP.hpp"
14#include "alpaka/tag.hpp"
15#include "alpaka/utility.hpp"
16
17#include <cstdint>
18#include <functional>
19#include <memory>
20#include <ranges>
21#include <sstream>
22
23namespace alpaka::onAcc
24{
25
26 template<typename T_IdxRange, typename T_ThreadSpace, typename T_IdxMapperFn, alpaka::concepts::CVector T_CSelect>
27 class FlatIdxContainer : private T_IdxMapperFn
28 {
29 void _()
30 {
31 static_assert(std::ranges::forward_range<FlatIdxContainer>);
32 static_assert(std::ranges::borrowed_range<FlatIdxContainer>);
33 static_assert(std::ranges::range<FlatIdxContainer>);
34 static_assert(std::ranges::input_range<FlatIdxContainer>);
35 }
36
37 public:
38 using IdxType = typename T_IdxRange::IdxType;
39 static constexpr uint32_t dim = T_IdxRange::dim();
41
43 T_IdxRange const& idxRange,
44 T_ThreadSpace const& threadSpace,
45 T_IdxMapperFn idxMapping,
46 T_CSelect const& = T_CSelect{})
47 : T_IdxMapperFn{std::move(idxMapping)}
48 , m_idxRange(idxRange)
49 , m_threadSpace{threadSpace}
50 {
51 // std::cout << "iter:" << m_idxRange.toString() << " " << m_threadSpace.toString() << std::endl;
52 }
53
54 constexpr FlatIdxContainer(FlatIdxContainer const&) = default;
55 constexpr FlatIdxContainer(FlatIdxContainer&&) = default;
56
57 class const_iterator;
58
59 /** special implementation to define the end
60 *
61 * Only a scalar value must be stored which reduce the register footprint.
62 * The definition of end is that the index is behind or equal to the extent of the slowest moving dimension.
63 */
65 {
66 friend class FlatIdxContainer;
67
68 void _()
69 {
70 static_assert(std::forward_iterator<const_iterator_end>);
71 static_assert(std::input_iterator<const_iterator_end>);
72 }
73
77
78 constexpr IdxType operator*() const
79 {
80 return m_extentSlowDim;
81 }
82
83 public:
84 constexpr bool operator==(const_iterator_end const& other) const
85 {
86 return (m_extentSlowDim == other.m_extentSlowDim);
87 }
88
89 constexpr bool operator!=(const_iterator_end const& other) const
90 {
91 return !(*this == other);
92 }
93
94 constexpr bool operator==(const_iterator const& other) const
95 {
96 return (m_extentSlowDim <= other.slowCurrent());
97 }
98
99 constexpr bool operator!=(const_iterator const& other) const
100 {
101 return !(*this == other);
102 }
103
104 private:
106 };
107
109 {
110 friend class FlatIdxContainer;
111 friend class const_iterator_end;
112
113 static constexpr uint32_t iterDim = T_CSelect::dim();
115
116 void _()
117 {
118 static_assert(std::forward_iterator<const_iterator>);
119 static_assert(std::input_iterator<const_iterator>);
120 }
121
122 constexpr const_iterator(
123 alpaka::concepts::Vector auto offsetMD,
124 IdxType const current,
125 IdxType const stride,
126 IdxType const end,
127 alpaka::concepts::Vector auto const extentMD,
128 alpaka::concepts::Vector auto const strideMD)
129 : m_offsetMD{offsetMD}
130 , m_current{current}
131 , m_end{end}
132 , m_stride{stride}
133 , m_extentMD{extentMD}
134 , m_strideMD{strideMD}
135 {
136 }
137
139 {
140 return m_current;
141 }
142
143 public:
144 constexpr IdxVecType operator*() const
145 {
146 auto result = m_offsetMD;
147 result.ref(T_CSelect{}) += mapToND(m_extentMD, m_current) * m_strideMD;
148 return result;
149 }
150
151 // pre-increment the iterator
153 {
155 return *this;
156 }
157
158 // post-increment the iterator
160 {
161 const_iterator old = *this;
162 ++(*this);
163 return old;
164 }
165
166 constexpr bool operator==(const_iterator const& other) const
167 {
168 return ((**this) == *other);
169 }
170
171 constexpr bool operator!=(const_iterator const& other) const
172 {
173 return !(*this == other);
174 }
175
176 constexpr bool operator==(const_iterator_end const& other) const
177 {
178 return (slowCurrent() >= *other);
179 }
180
181 constexpr bool operator!=(const_iterator_end const& other) const
182 {
183 return !(*this == other);
184 }
185
186 private:
188 // modified by the pre/post-increment operator
190 // non-const to support iterator copy and assignment
195 };
196
198 {
199 constexpr auto selectedDims = T_CSelect{};
200 auto [threadIdx, numThreads] = m_threadSpace.mapTo(selectedDims);
201
202 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
203 {
204 auto groupOffset = threadIdx * m_idxRange.m_stride;
205 groupOffset.ref(selectedDims) -= groupOffset[selectedDims];
206
207 auto begin = m_idxRange.m_begin + groupOffset;
208
209 auto linearCurrent = linearize(numThreads[selectedDims], threadIdx[selectedDims]);
210 auto linearStride = numThreads[selectedDims].product();
211 auto strideMD = m_idxRange.m_stride[selectedDims];
212 auto extentMD = divCeil(m_idxRange.distance()[selectedDims], strideMD);
213
214 return const_iterator(begin, linearCurrent, linearStride, extentMD.product(), extentMD, strideMD);
215 }
216 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
217 {
218 auto groupOffset = threadIdx * m_idxRange.m_stride;
219 groupOffset.ref(selectedDims) -= groupOffset[selectedDims];
220
221 auto begin = m_idxRange.m_begin + groupOffset;
222
223 auto strideMD = m_idxRange.m_stride[selectedDims];
224 auto extentMD = divCeil(m_idxRange.distance()[selectedDims], strideMD);
225
226 auto threadCountMD = m_threadSpace.m_threadCount[selectedDims];
227
228 auto numWorkerSlots = threadCountMD.product();
229 auto linearSlotIdx = linearize(threadCountMD, threadIdx[selectedDims]);
230
231 auto logicalExtent = extentMD.product();
232
233 // elements per slot
234 auto base = logicalExtent / numWorkerSlots;
235 // remainder elements will be given to the slots with id lower than rem
236 auto rem = logicalExtent % numWorkerSlots;
237
238 auto nextLinearSlotIdx = linearSlotIdx + IdxType{1};
239
240 auto linearCurrent = linearSlotIdx * base + std::min(linearSlotIdx, rem);
241 auto linearEnd = nextLinearSlotIdx * base + std::min(nextLinearSlotIdx, rem);
242
243 return const_iterator(
244 begin,
245 linearCurrent,
246 IdxType{1u},
247 std::min(linearEnd, logicalExtent),
248 extentMD,
249 strideMD);
250 }
251 }
252
254 {
255 constexpr auto selectedDims = T_CSelect{};
256 auto [threadIdx, numThreads] = m_threadSpace.mapTo(selectedDims);
257
258 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
259 {
260 auto extentMD = divCeil(m_idxRange.distance()[selectedDims], m_idxRange.m_stride[selectedDims]);
261 return const_iterator_end(extentMD.product());
262 }
263 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
264 {
265 auto strideMD = m_idxRange.m_stride[selectedDims];
266 auto extentMD = divCeil(m_idxRange.distance()[selectedDims], strideMD);
267
268 auto numWorkerSlots = numThreads[selectedDims].product();
269 auto linearSlotIdx = linearize(numThreads[selectedDims], threadIdx[selectedDims]);
270
271 auto logicalExtent = extentMD.product();
272
273 // elements per slot
274 auto base = logicalExtent / numWorkerSlots;
275 // remainder elements will be given to the slots with id lower than rem
276 auto rem = logicalExtent % numWorkerSlots;
277
278 auto nextLinearSlotIdx = linearSlotIdx + IdxType{1};
279 auto linearEnd = nextLinearSlotIdx * base + std::min(nextLinearSlotIdx, rem);
280
281 return const_iterator_end(std::min(linearEnd, logicalExtent));
282 }
283 }
284
292
293 private:
294 T_IdxRange m_idxRange;
295 T_ThreadSpace m_threadSpace;
296 };
297} // namespace alpaka::onAcc
special implementation to define the end
constexpr bool operator==(const_iterator_end const &other) const
ALPAKA_FN_ACC const_iterator_end(IdxType const &end)
constexpr bool operator==(const_iterator const &other) const
constexpr bool operator!=(const_iterator_end const &other) const
constexpr bool operator!=(const_iterator const &other) const
constexpr const_iterator(alpaka::concepts::Vector auto offsetMD, IdxType const current, IdxType const stride, IdxType const end, alpaka::concepts::Vector auto const extentMD, alpaka::concepts::Vector auto const strideMD)
constexpr bool operator!=(const_iterator const &other) const
constexpr bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC constexpr IdxType slowCurrent() const
constexpr bool operator!=(const_iterator_end const &other) const
constexpr bool operator==(const_iterator_end const &other) const
constexpr FlatIdxContainer(FlatIdxContainer const &)=default
ALPAKA_FN_HOST_ACC constexpr auto operator[](alpaka::concepts::CVector auto const iterDir) const
ALPAKA_FN_ACC FlatIdxContainer(T_IdxRange const &idxRange, T_ThreadSpace const &threadSpace, T_IdxMapperFn idxMapping, T_CSelect const &=T_CSelect{})
ALPAKA_FN_ACC const_iterator_end end() const
ALPAKA_FN_ACC const_iterator begin() const
typename T_IdxRange::IdxType IdxType
static constexpr uint32_t dim
constexpr FlatIdxContainer(FlatIdxContainer &&)=default
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:30
#define ALPAKA_FN_HOST_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:31
Concept to check if a type is a CVector.
Definition Vec.hpp:74
Concept to check if a type is a vector.
Definition Vec.hpp:53
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
ALPAKA_FN_HOST_ACC constexpr auto divCeil(Integral a, Integral b) -> Integral
Returns the ceiling of a / b, as integer.
Definition utility.hpp:34
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.
Definition Vec.hpp:832
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.
Definition Vec.hpp:873
STL namespace.