alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
iota.hpp
Go to the documentation of this file.
1/* Copyright 2025 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7
8#include "alpaka/SimdPtr.hpp"
9#include "alpaka/Vec.hpp"
11#include "alpaka/onAcc/Acc.hpp"
15#include "alpaka/trait.hpp"
16
17namespace alpaka::onHost::internal
18{
19 struct SimdIotaKernel
20 {
21 template<typename T_DataType>
22 ALPAKA_FN_ACC void operator()(
23 onAcc::concepts::Acc auto const& acc,
24 alpaka::concepts::Vector auto extents,
25 T_DataType const& initValue,
26 alpaka::concepts::IMdSpan auto&&... inputs) const
27 {
28 auto simdGrid = onAcc::SimdAlgo{onAcc::worker::threadsInGrid};
29
30 return simdGrid.concurrent(
31 acc,
32 extents,
33 [&](onAcc::concepts::Acc auto const&,
34 alpaka::concepts::SimdPtr auto&& in0,
35 alpaka::concepts::SimdPtr auto&&... inOther)
36 {
37 using SimdType = ALPAKA_TYPEOF(in0.load());
38 alpaka::concepts::Vector auto iotaOffsetMd = in0.getIdx();
39 T_DataType linearBaseOffset
40 = static_cast<T_DataType>(linearize(extents, iotaOffsetMd)) + initValue;
41 alpaka::concepts::Simd auto result
42 = SimdType([&](auto const& laneId) constexpr
43 { return linearBaseOffset + static_cast<T_DataType>(laneId); });
44 // write output
46 ((inOther = pCast<alpaka::trait::GetValueType_t<ALPAKA_TYPEOF(inOther)>>(result)), ...);
47 },
48 ALPAKA_FORWARD(inputs)...);
49 }
50 };
51
52 template<typename T_DataType>
53 inline void iota(
54 auto const& queue,
55 alpaka::concepts::Executor auto const exec,
56 alpaka::concepts::Vector auto const& extents,
57 T_DataType const& initValue,
58 alpaka::concepts::IMdSpan auto&&... inputs)
59 {
60 Vec const extentMd = extents;
61 auto frameSpec = getFrameSpec<T_DataType>(queue.getDevice(), exec, extentMd);
62
65 [&]()
66 {
67 std::stringstream ss;
68 ss << "iota{ extents=" << extentMd << ", value_type=" << onHost::demangledName<T_DataType>() << ", "
69 << frameSpec << " }";
70 return ss.str();
71 });
72
73 queue.enqueue(frameSpec, KernelBundle{SimdIotaKernel{}, extentMd, initValue, ALPAKA_FORWARD(inputs)...});
74 }
75} // namespace alpaka::onHost::internal
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:30
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:153
#define ALPAKA_FORWARD(instance)
Perfectly forward an instance as argument.
Definition common.hpp:147
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
Definition logger.hpp:106
constexpr auto threadsInGrid
constexpr auto queue
Definition lvl.hpp:127
constexpr auto memory
Definition lvl.hpp:112
constexpr auto demangledName()
typename GetValueType< T >::type GetValueType_t
Definition trait.hpp:65
constexpr T_IntegralType linearize(Vec< T_IntegralType, T_dim - 1u, T_Storage > const &dim, Vec< T_IntegralType, T_dim, T_OtherStorage > const &idx)
Give the linear index of an N-dimensional index within an N-dimensional index space.
Definition Vec.hpp:832
ALPAKA_FN_HOST_ACC Vec(T_1, T_Args...) -> Vec< T_1, uint32_t(sizeof...(T_Args)+1u), ArrayStorage< T_1, uint32_t(sizeof...(T_Args)+1u)> >
ALPAKA_FN_HOST KernelBundle(TKernelFn const &, TArgs &&...) -> KernelBundle< TKernelFn, TArgs... >
User defined deduction guide with trailing return type. For CTAD during the construction.
constexpr decltype(auto) pCast(auto &&input)
Performs a static_cast on the storage type of combined data type.
Definition cast.hpp:48