alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
StdSimd.hpp
Go to the documentation of this file.
1/* Copyright 2026 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5/** @file This file provides a basic implementation of a SIMD vector.
6 *
7 * The implementation is based on the class Vec:
8 * - the storge policy should become the native SIMD implementation e.g. std::simd
9 * - load/ store and simd specifis should be implemented in the storage policy
10 * - the name of storage policy should be changed
11 *
12 * The current operator operations relay on compilers auto vectorization.
13 */
14
15#pragma once
16
17#include "alpaka/api/api.hpp"
21#include "alpaka/simd/trait.hpp"
23
24#include <type_traits>
25
26#if ALPAKA_HAS_STD_SIMD
27
28namespace alpaka
29{
30 namespace internal
31 {
32 template<typename T_Type, uint32_t T_width>
33 struct StdSimd
34 : protected alpakaStdSimd::rebind_simd_t<T_Type, alpakaStdSimd::fixed_size_simd<T_Type, T_width>>
35 {
36 using BaseType = alpakaStdSimd::rebind_simd_t<T_Type, alpakaStdSimd::fixed_size_simd<T_Type, T_width>>;
37
38 using value_type = typename BaseType::value_type;
39 using reference = typename BaseType::reference;
40
41 using BaseType::operator[];
42
43 constexpr StdSimd() = default;
44 constexpr StdSimd(StdSimd const&) = default;
45 constexpr StdSimd(StdSimd&&) = default;
46 constexpr StdSimd& operator=(StdSimd&& rhs) = default;
47
48 constexpr StdSimd& operator=(StdSimd const& rhs) = default;
49
50 constexpr StdSimd& operator=(T_Type const value)
51 {
52 this->asNativeType() = value;
53 return *this;
54 }
55
56 // constructor is required because exposing the array constructors does not work
57 template<typename... T_Args>
58 requires(sizeof...(T_Args) == T_width && (std::same_as<T_Args, T_Type> && ...))
59 ALPAKA_FN_HOST_ACC StdSimd(T_Args&&... args)
60 : BaseType([=](int i) constexpr { return std::array<T_Type, T_width>{args...}[i]; })
61 {
62 }
63
64 constexpr StdSimd(BaseType const& nativeSimd) : BaseType{nativeSimd}
65 {
66 }
67
68 /** static cast the instance to the parent std::simd class
69 *
70 * This method is mostly used to get access to native arithmetic and comparison operators.
71 * @{
72 */
73 constexpr auto& asNativeType()
74 {
75 return static_cast<BaseType&>(*this);
76 }
77
78 constexpr auto const& asNativeType() const
79 {
80 return static_cast<BaseType const&>(*this);
81 }
82
83 /** @} */
84
85 constexpr decltype(auto) where(alpaka::concepts::SimdMask auto const& mask) const
86 {
87 return alpakaStdSimd::where(mask.asNativeType(), asNativeType());
88 }
89
90 constexpr decltype(auto) where(alpaka::concepts::SimdMask auto const& mask)
91 {
92 return alpakaStdSimd::where(mask.asNativeType(), asNativeType());
93 }
94
95 static constexpr auto fill(T_Type value)
96 {
97 return StdSimd{BaseType(value)};
98 }
99
100 constexpr void copyFrom(T_Type const* data, alpaka::concepts::Alignment auto alignment)
101 {
102 if constexpr((alignment.template get<T_Type>() % alpakaStdSimd::memory_alignment_v<BaseType>) == 0u)
103 this->asNativeType().copy_from(data, alpakaStdSimd::vector_aligned);
104 else
105 this->asNativeType().copy_from(data, alpakaStdSimd::element_aligned);
106 }
107
108 constexpr void copyTo(auto* data, alpaka::concepts::Alignment auto alignment) const
109 {
110 if constexpr((alignment.template get<T_Type>() % alpakaStdSimd::memory_alignment_v<BaseType>) == 0u)
111 this->asNativeType().copy_to(data, alpakaStdSimd::vector_aligned);
112 else
113 this->asNativeType().copy_to(data, alpakaStdSimd::element_aligned);
114 }
115
116 /** assign operator
117 */
118# define ALPAKA_VECTOR_ASSIGN_OP(op) \
119 constexpr StdSimd& operator op(StdSimd const& rhs) \
120 { \
121 this->asNativeType() op rhs.asNativeType(); \
122 return *this; \
123 } \
124 constexpr StdSimd& operator op(T_Type const value) \
125 { \
126 this->asNativeType() op value; \
127 return *this; \
128 }
129
134
135# undef ALPAKA_VECTOR_ASSIGN_OP
136 };
137
138# define ALPAKA_VECTOR_BINARY_OP(typenameOrConcept, op) \
139 template<typenameOrConcept T_Type, uint32_t T_width> \
140 constexpr auto operator op(const StdSimd<T_Type, T_width>& lhs, const StdSimd<T_Type, T_width>& rhs) \
141 { \
142 return StdSimd<T_Type, T_width>{lhs.asNativeType() op rhs.asNativeType()}; \
143 } \
144 template<typenameOrConcept T_Type, uint32_t T_width> \
145 constexpr auto operator op(const StdSimd<T_Type, T_width>& lhs, T_Type rhs) \
146 { \
147 return StdSimd<T_Type, T_width>{lhs.asNativeType() op rhs}; \
148 } \
149 template<typenameOrConcept T_Type, uint32_t T_width> \
150 constexpr auto operator op(T_Type lhs, const StdSimd<T_Type, T_width>& rhs) \
151 { \
152 return StdSimd<T_Type, T_width>{lhs op rhs.asNativeType()}; \
153 }
154
155 ALPAKA_VECTOR_BINARY_OP(typename, +)
156 ALPAKA_VECTOR_BINARY_OP(typename, -)
157 ALPAKA_VECTOR_BINARY_OP(typename, *)
158 ALPAKA_VECTOR_BINARY_OP(typename, /)
159 ALPAKA_VECTOR_BINARY_OP(std::integral, <<)
160 ALPAKA_VECTOR_BINARY_OP(std::integral, >>)
161 ALPAKA_VECTOR_BINARY_OP(std::integral, &)
162 ALPAKA_VECTOR_BINARY_OP(std::integral, |)
163 ALPAKA_VECTOR_BINARY_OP(std::integral, ^)
164
165 /** Workaround clang + glibc 12 issue with std::simd modulo operator
166 *
167 * /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/experimental/bits/simd_x86.h:1492:51: error:
168 * explicit qualification required to use member '_S_divides' from dependent base class 1492 | return
169 * _Base::_S_minus(__x, _S_multiplies(__y, _S_divides(__x, __y)));
170 *
171 * This workaround is executing the operation lane by lane which can break SIMD usage if the auto vectorizer is
172 * not understanding the code.
173 */
174# if defined(__clang__) && defined(__GLIBCXX__) && (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE == 12)
175 template<std::integral T_Type, uint32_t T_width>
176 constexpr auto operator%(const StdSimd<T_Type, T_width>& lhs, const StdSimd<T_Type, T_width>& rhs)
177 {
178 using BaseType = typename StdSimd<T_Type, T_width>::BaseType;
179 return StdSimd<T_Type, T_width>(
180 BaseType([&](int i) { return lhs.asNativeType()[i] % rhs.asNativeType()[i]; }));
181 }
182
183 template<std::integral T_Type, uint32_t T_width>
184 constexpr auto operator%(StdSimd<T_Type, T_width> const& lhs, T_Type rhs)
185 {
186 using BaseType = typename StdSimd<T_Type, T_width>::BaseType;
187 return StdSimd<T_Type, T_width>(BaseType([&](int i) { return lhs.asNativeType()[i] % rhs; }));
188 }
189
190 template<std::integral T_Type, uint32_t T_width>
191 constexpr auto operator%(T_Type lhs, StdSimd<T_Type, T_width> const& rhs)
192 {
193 using BaseType = typename StdSimd<T_Type, T_width>::BaseType;
194 return StdSimd<T_Type, T_width>(BaseType([&](int i) { return lhs % rhs.asNativeType()[i]; }));
195 }
196# else
197 ALPAKA_VECTOR_BINARY_OP(std::integral, %)
198# endif
199# undef ALPAKA_VECTOR_BINARY_OP
200
201 } // namespace internal
202
203 namespace trait
204 {
205 template<typename T_Type, uint32_t T_width>
206 requires(
207 std::has_single_bit(T_width) && std::has_single_bit(sizeof(T_Type))
208 && alpakaStdSimd::fixed_size_simd<T_Type, T_width>::size() > 0)
210 {
211 using type = internal::StdSimd<T_Type, T_width>;
212 };
213
214 } // namespace trait
215} // namespace alpaka
216#endif
#define ALPAKA_VECTOR_ASSIGN_OP(op)
assign operator
Definition Simd.hpp:232
#define ALPAKA_VECTOR_BINARY_OP(typenameOrConcept, op)
binary operators
Definition Simd.hpp:549
#define ALPAKA_FN_HOST_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition common.hpp:31
void fill(auto &internalQueue, auto executor, alpaka::concepts::IMdSpan< T_Value > auto &&dest, T_Value elementValue)
Definition generic.hpp:63
alpaka internal implementations.
Definition generic.hpp:19
constexpr auto operator%(const EmuSimd< T_Type, T_width > &lhs, const EmuSimd< T_Type, T_width > &rhs)
Definition EmuSimd.hpp:235
main alpaka namespace.
Definition alpaka.hpp:76
constexpr SimdWhereExpr< T_Mask, T_Simd > where(T_Mask const &mask, T_Simd &value)
Conditionally update each component of an SIMD pack.
Get the storage type for a SIMD pack.
Definition EmuSimd.hpp:250