alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
transform.hpp
Go to the documentation of this file.
1/* Copyright 2025 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7
8#include "alpaka/Simd.hpp"
9#include "alpaka/Vec.hpp"
11#include "alpaka/functor.hpp"
12#include "alpaka/mem/MdSpan.hpp"
13#include "alpaka/onAcc/Acc.hpp"
17#include "alpaka/trait.hpp"
18
19namespace alpaka::onHost::internal
20{
21 struct SimdTransformKernel
22 {
23 ALPAKA_FN_ACC void operator()(
24 onAcc::concepts::Acc auto const& acc,
25 alpaka::concepts::IMdSpan auto&& output,
26 auto const& func,
27 alpaka::concepts::IDataSource auto&&... inputs) const
28 {
29 auto simdGrid = onAcc::SimdAlgo{onAcc::worker::threadsInGrid};
31 {
32 return simdGrid.concurrent(
33 acc,
34 output.getExtents(),
35 [&](auto const& acc, auto out, auto&&... in)
36 { out = callFunctor(acc, func, ALPAKA_FORWARD(in)...); },
37 ALPAKA_FORWARD(output),
38 ALPAKA_FORWARD(inputs)...);
39 }
40 else if constexpr(isSpecializationOf_v<ALPAKA_TYPEOF(func), ScalarFunc>)
41 {
42 simdGrid.concurrent(
43 acc,
44 output.getExtents(),
45 [&](auto const& acc, auto outPtr, auto const&... inPtr) constexpr
46 {
47 outPtr = loadAncExecuteScalarOp(
48 std::make_integer_sequence<uint32_t, ALPAKA_TYPEOF(outPtr)::width()>{},
49 [](alpaka::concepts::CVector auto idx,
50 auto const& acc,
51 auto&& func,
52 auto&&... data) constexpr { return callFunctor(acc, func, data[idx.x()]...); },
53 acc,
54 func,
55 inPtr.load()...);
56 },
57 ALPAKA_FORWARD(output),
58 ALPAKA_FORWARD(inputs)...);
59 }
60 else
61 {
62 return simdGrid.concurrent(
63 acc,
64 output.getExtents(),
65 [&](auto const& acc, auto out, auto const&... in) { out = callFunctor(acc, func, in.load()...); },
66 ALPAKA_FORWARD(output),
67 ALPAKA_FORWARD(inputs)...);
68 }
69 }
70
71 template<uint32_t... T_idx>
72 ALPAKA_FN_INLINE static constexpr auto loadAncExecuteScalarOp(
73 std::integer_sequence<uint32_t, T_idx...>,
74 auto&& op,
75 auto const& acc,
76 auto&& func,
77 auto&&... data)
78 {
79 return Simd{op(CVec<uint32_t, T_idx>{}, acc, ALPAKA_FORWARD(func), ALPAKA_FORWARD(data)...)...};
80 }
81 };
82
83 inline void transform(
84 auto const& queue,
85 alpaka::concepts::Executor auto const exec,
87 auto&& fn,
89 {
90 auto extentMd = onHost::getExtents(out);
91 using DataType = alpaka::trait::GetValueType_t<ALPAKA_TYPEOF(out)>;
92 auto frameSpec = getFrameSpec<DataType>(queue.getDevice(), exec, extentMd);
93
95 onHost::logger::memory,
96 [&]()
97 {
98 std::stringstream ss;
99 ss << "transform{ extents=" << extentMd << ", value_type=" << onHost::demangledName<DataType>() << ", "
100 << frameSpec << ", fn=" << onHost::demangledName(fn) << " }";
101 return ss.str();
102 });
103
104 queue.enqueue(
105 frameSpec,
106 KernelBundle{SimdTransformKernel{}, ALPAKA_FORWARD(out), ALPAKA_FORWARD(fn), ALPAKA_FORWARD(in)...});
107 }
108} // namespace alpaka::onHost::internal
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:30
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:153
#define ALPAKA_FN_INLINE
Macro defining the inline function attribute.
Definition common.hpp:87
#define ALPAKA_FORWARD(instance)
Perfectly forward an instance as argument.
Definition common.hpp:147
Concept to check for an executor.
Definition trait.hpp:133
Interface concept for objects describing multidimensional memory access.
Definition IMdSpan.hpp:91
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
Definition logger.hpp:106
constexpr auto threadsInGrid
constexpr auto queue
Definition lvl.hpp:127
decltype(auto) data(auto &&any)
pointer to data of an object
typename GetValueType< T >::type GetValueType_t
Definition trait.hpp:65
ALPAKA_FN_HOST_ACC StencilFunc(T_Func &&) -> StencilFunc< T_Func >
Vec< T, sizeof...(T_values), detail::CVec< T, T_values... > > CVec
A vector with compile-time known values.
Definition CVec.hpp:31
ALPAKA_FN_HOST_ACC ScalarFunc(T_Func &&) -> ScalarFunc< T_Func >
constexpr bool isSpecializationOf_v
checks if T is a instance of U
Definition utility.hpp:103
ALPAKA_FN_HOST_ACC Simd(T_1, T_Args...) -> Simd< T_1, uint32_t(sizeof...(T_Args)+1u)>