26 template<
typename T_IdxRange,
typename T_ThreadSpace,
typename T_IdxMapperFn, alpaka::concepts::CVector T_CSelect>
31 static_assert(std::ranges::forward_range<FlatIdxContainer>);
32 static_assert(std::ranges::borrowed_range<FlatIdxContainer>);
33 static_assert(std::ranges::range<FlatIdxContainer>);
34 static_assert(std::ranges::input_range<FlatIdxContainer>);
38 using IdxType =
typename T_IdxRange::IdxType;
39 static constexpr uint32_t
dim = T_IdxRange::dim();
43 T_IdxRange
const& idxRange,
44 T_ThreadSpace
const& threadSpace,
45 T_IdxMapperFn idxMapping,
46 T_CSelect
const& = T_CSelect{})
47 : T_IdxMapperFn{
std::move(idxMapping)}
70 static_assert(std::forward_iterator<const_iterator_end>);
71 static_assert(std::input_iterator<const_iterator_end>);
91 return !(*
this == other);
101 return !(*
this == other);
113 static constexpr uint32_t
iterDim = T_CSelect::dim();
118 static_assert(std::forward_iterator<const_iterator>);
119 static_assert(std::input_iterator<const_iterator>);
168 return ((**
this) == *other);
173 return !(*
this == other);
183 return !(*
this == other);
199 constexpr auto selectedDims = T_CSelect{};
200 auto [threadIdx, numThreads] =
m_threadSpace.mapTo(selectedDims);
202 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
204 auto groupOffset = threadIdx *
m_idxRange.m_stride;
205 groupOffset.ref(selectedDims) -= groupOffset[selectedDims];
209 auto linearCurrent =
linearize(numThreads[selectedDims], threadIdx[selectedDims]);
210 auto linearStride = numThreads[selectedDims].product();
211 auto strideMD =
m_idxRange.m_stride[selectedDims];
214 return const_iterator(
begin, linearCurrent, linearStride, extentMD.product(), extentMD, strideMD);
216 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
218 auto groupOffset = threadIdx *
m_idxRange.m_stride;
219 groupOffset.ref(selectedDims) -= groupOffset[selectedDims];
223 auto strideMD =
m_idxRange.m_stride[selectedDims];
226 auto threadCountMD =
m_threadSpace.m_threadCount[selectedDims];
228 auto numWorkerSlots = threadCountMD.product();
229 auto linearSlotIdx =
linearize(threadCountMD, threadIdx[selectedDims]);
231 auto logicalExtent = extentMD.product();
234 auto base = logicalExtent / numWorkerSlots;
236 auto rem = logicalExtent % numWorkerSlots;
238 auto nextLinearSlotIdx = linearSlotIdx +
IdxType{1};
240 auto linearCurrent = linearSlotIdx * base + std::min(linearSlotIdx, rem);
241 auto linearEnd = nextLinearSlotIdx * base + std::min(nextLinearSlotIdx, rem);
247 std::min(linearEnd, logicalExtent),
255 constexpr auto selectedDims = T_CSelect{};
256 auto [threadIdx, numThreads] =
m_threadSpace.mapTo(selectedDims);
258 if constexpr(std::is_same_v<T_IdxMapperFn, layout::Strided>)
263 else if constexpr(std::is_same_v<T_IdxMapperFn, layout::Contiguous>)
265 auto strideMD =
m_idxRange.m_stride[selectedDims];
268 auto numWorkerSlots = numThreads[selectedDims].product();
269 auto linearSlotIdx =
linearize(numThreads[selectedDims], threadIdx[selectedDims]);
271 auto logicalExtent = extentMD.product();
274 auto base = logicalExtent / numWorkerSlots;
276 auto rem = logicalExtent % numWorkerSlots;
278 auto nextLinearSlotIdx = linearSlotIdx +
IdxType{1};
279 auto linearEnd = nextLinearSlotIdx * base + std::min(nextLinearSlotIdx, rem);
special implementation to define the end
constexpr bool operator==(const_iterator_end const &other) const
constexpr IdxType operator*() const
ALPAKA_FN_ACC const_iterator_end(IdxType const &end)
friend class FlatIdxContainer
constexpr bool operator==(const_iterator const &other) const
constexpr bool operator!=(const_iterator_end const &other) const
constexpr bool operator!=(const_iterator const &other) const
constexpr const_iterator(alpaka::concepts::Vector auto offsetMD, IdxType const current, IdxType const stride, IdxType const end, alpaka::concepts::Vector auto const extentMD, alpaka::concepts::Vector auto const strideMD)
constexpr bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC const_iterator & operator++()
ALPAKA_FN_ACC const_iterator operator++(int)
constexpr bool operator==(const_iterator const &other) const
IterIdxVecType m_extentMD
ALPAKA_FN_ACC constexpr IdxType slowCurrent() const
constexpr bool operator!=(const_iterator_end const &other) const
Vec< IdxType, iterDim > IterIdxVecType
constexpr IdxVecType operator*() const
friend class FlatIdxContainer
static constexpr uint32_t iterDim
constexpr bool operator==(const_iterator_end const &other) const
IterIdxVecType m_strideMD
friend class const_iterator_end
constexpr FlatIdxContainer(FlatIdxContainer const &)=default
ALPAKA_FN_HOST_ACC constexpr auto operator[](alpaka::concepts::CVector auto const iterDir) const
ALPAKA_FN_ACC FlatIdxContainer(T_IdxRange const &idxRange, T_ThreadSpace const &threadSpace, T_IdxMapperFn idxMapping, T_CSelect const &=T_CSelect{})
ALPAKA_FN_ACC const_iterator_end end() const
ALPAKA_FN_ACC const_iterator begin() const
T_ThreadSpace m_threadSpace
typename T_IdxRange::IdxType IdxType
static constexpr uint32_t dim
constexpr FlatIdxContainer(FlatIdxContainer &&)=default
Vec< IdxType, dim > IdxVecType
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
#define ALPAKA_FN_HOST_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Concept to check if a type is a CVector.
Concept to check if a type is a vector.
functionality which is usable on the accelerator compute device from within a kernel.
ALPAKA_FN_HOST_ACC constexpr auto divCeil(Integral a, Integral b) -> Integral
Returns the ceiling of a / b, as integer.
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.