alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
generic.hpp
Go to the documentation of this file.
1/* Copyright 2025 René Widera, Mehmet Yusufoglu
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5
6#pragma once
7
15
16#include <algorithm>
17
18namespace alpaka::internal::generic
19{
20 namespace math
21 {
22 template<typename T>
23 ALPAKA_FN_HOST_ACC constexpr bool isnan(T const& value)
24 {
25 return alpaka::math::internal::ieeeIsnan(value);
26 }
27
28 template<typename T>
29 ALPAKA_FN_HOST_ACC constexpr bool isinf(T const& value)
30 {
31 return alpaka::math::internal::ieeeIsinf(value);
32 }
33
34 template<typename T>
35 ALPAKA_FN_HOST_ACC constexpr bool isfinite(T const& value)
36 {
37 return alpaka::math::internal::ieeeIsfinite(value);
38 }
39 } // namespace math
40
41 /** assign a value to each element of the destination
42 *
43 * @todo replace the kernel as soon as we have an algorithm forEach callable from host
44 */
45 struct SimdFillKernel
46 {
47 ALPAKA_FN_ACC void operator()(auto const& acc, alpaka::concepts::IMdSpan auto dest, auto const value) const
48 {
49 auto simdGrid = onAcc::SimdAlgo{onAcc::worker::threadsInGrid};
50 simdGrid.concurrent(
51 acc,
52 dest.getExtents(),
53 [value](onAcc::concepts::Acc auto const&, auto destSimdPtr) constexpr
54 {
55 using SimdType = ALPAKA_TYPEOF(destSimdPtr.load());
56 destSimdPtr = SimdType::fill(value);
57 },
58 dest);
59 }
60 };
61
62 template<typename T_Value>
63 inline void fill(
64 auto& internalQueue,
65 auto executor,
66 alpaka::concepts::IMdSpan<T_Value> auto&& dest,
67 T_Value elementValue)
68 {
70
71 auto extents = onHost::getExtents(dest);
72 auto frameSpec
73 = onHost::internal::getFrameSpec<T_Value>(*onHost::internal::getDevice(internalQueue), executor, extents);
74
77 [&]()
78 {
79 std::stringstream ss;
80 ss << "fill{ extents=" << extents << ", elementsPerFrameItem" << ", dst=" << dest
81 << ", value_type=" << onHost::demangledName(elementValue) << ", frameSpec=" << frameSpec << " }";
82 return ss.str();
83 });
84
85 onHost::internal::enqueue(internalQueue, frameSpec, KernelBundle{SimdFillKernel{}, dest, elementValue});
86 }
87} // namespace alpaka::internal::generic
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:30
#define ALPAKA_FN_HOST_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:31
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
Definition logger.hpp:106
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95
constexpr auto threadsInGrid
constexpr auto memory
Definition lvl.hpp:112
constexpr auto demangledName()
decltype(auto) getExtents(auto &&any)
Object extents.
Definition interface.hpp:25
ALPAKA_FN_HOST KernelBundle(TKernelFn const &, TArgs &&...) -> KernelBundle< TKernelFn, TArgs... >
User defined deduction guide with trailing return type. For CTAD during the construction.
On some constexpr function signatures ALPAKA_FN_HOST_ACC is required for CUDA; otherwise a __host__ f...