alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
UniformReal.hpp
Go to the documentation of this file.
1/* Copyright 2025 Mehmet Yusufoglu, Tim Hanel
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11namespace alpaka::rand::distribution::internal
12{
13 /** Returns a constant, which is equivalent to std::nextafter(T_Floating{1}, T_Floating{0})
14 * or more specifically: returns the highest floating-point value lower than one.
15 * There does not seem to exist a representation of this particular floating-point number in std::numerical_limits
16 * and std::nextafter is not constexpr(cpp20).
17 */
18 template<std::floating_point T_Floating>
19 consteval T_Floating prevOne() noexcept
20 {
21 if constexpr(sizeof(T_Floating) == 4)
22 {
23 return std::bit_cast<T_Floating>(static_cast<uint32_t>(0x3f7f'ffff));
24 }
25 else if constexpr(sizeof(T_Floating) == 8)
26 {
27 return std::bit_cast<T_Floating>(static_cast<uint64_t>(0x3fef'ffff'ffff'ffff));
28 }
29 }
30
31 /**
32 * Contains some (constexpr-)constants for random bit integer to floating point conversion -- which
33 * improve readability.
34 */
35 template<std::unsigned_integral T_Integer, std::floating_point T_Floating>
36 struct Constants
37 {
38 /// represents one bucket when converting an integer numbers to a floating point type in the range [0,1]
39 static constexpr T_Floating normalizedIntervalBin
40 = T_Floating{1} / static_cast<T_Floating>(std::numeric_limits<T_Integer>::max());
41 /// this expression has been used by nvidia curand to respect the lower bounds criteria -> it essentially
42 /// shifts the distribution to the bin center (on the upper bounds this shift is mitigated due to rounding --
43 /// meaning 1 is not exceeded)
44 static constexpr T_Floating halfBinWidth = normalizedIntervalBin / T_Floating{2};
45 /// uses a slightly smaller bucket size [0,std::nextafter(1,0)] / MAX to enforce the (open-) upper bounds
46 /// criteria
47 static constexpr T_Floating normalizedOpenIntervalBin
48 = prevOne<T_Floating>() / static_cast<T_Floating>(std::numeric_limits<T_Integer>::max());
49 };
50
51 /** Convert an integer RNG result to a floating-point value.
52 *
53 * This is the fallback implementation used when no interval specialization
54 * matches. It should never be instantiated and exists only to
55 * catch unsupported interval configurations.
56 */
57 template<typename T_Engine, concepts::Interval T_Interval, std::integral T_Integer, std::floating_point T_Floating>
58 struct IntervalAwareConversion;
59
60 /** Converts an integer RNG output to a floating-point type in the interval [0, 1).
61 * The value is mapped to an interval in the range [0,std::nextafter(1,0)),
62 * where std::nextafter(1,0) represents the highest representable floating-point value lower than one.
63 */
64 template<typename T_Engine, std::integral T_Integer, std::floating_point T_Floating>
65 struct IntervalAwareConversion<T_Engine, interval::CO, T_Integer, T_Floating>
66 {
67 constexpr auto operator()(T_Integer const& i) const
68 {
69 constexpr auto interval = Constants<T_Integer, T_Floating>::normalizedOpenIntervalBin;
70 return static_cast<T_Floating>(i) * interval;
71 };
72 };
73
74 /** Convert an integer RNG output to a floating-point type in the interval (0, 1).
75 * The value is mapped to an interval in the range [0,std::nextafter(1,0)),
76 * where std::nextafter(1,0) represents the highest representable floating-point value lower than one.
77 * Afterward an offset is added to avoid generating zero - inspired by Nvidias curand approach.
78 * **Note:** The bounds criteria is only strictly valid for the base interval (0, 1).
79 * When applying a scaling factor > 1, rounding effects may still cause the lower or upper bound
80 * to be hit. See **scaleInterval()** for the post-scaling correction.
81 */
82 template<typename T_Engine, std::integral T_Integer, std::floating_point T_Floating>
83 struct IntervalAwareConversion<T_Engine, interval::OO, T_Integer, T_Floating>
84 {
85 constexpr auto operator()(T_Integer const& i) const
86 {
87 return static_cast<T_Floating>(i) * Constants<T_Integer, T_Floating>::normalizedOpenIntervalBin
88 + Constants<T_Integer, T_Floating>::halfBinWidth;
89 };
90 };
91
92 /** Convert an integer RNG output to a floating-point type in the interval (0, 1].
93 * Afterward an offset is added to avoid generating zero - inspired by Nvidias curand approach.
94 * **Note:** The bounds criteria is only strictly valid for the base interval (0, 1).
95 * When applying a scaling factor > 1, rounding effects may still cause the lower or upper bound
96 * to be hit. See **scaleInterval()** for the post-scaling correction.
97 */
98 template<typename T_Engine, std::integral T_Integer, std::floating_point T_Floating>
99 struct IntervalAwareConversion<T_Engine, interval::OC, T_Integer, T_Floating>
100 {
101 constexpr auto operator()(T_Integer const& i) const
102 {
103 return static_cast<T_Floating>(i) * Constants<T_Integer, T_Floating>::normalizedIntervalBin
104 + Constants<T_Integer, T_Floating>::halfBinWidth;
105 };
106 };
107
108 /** Converts an integer RNG output to a floating point type in the closed interval [0, 1].
109 */
110 template<typename T_Engine, std::integral T_Integer, std::floating_point T_Floating>
111 struct IntervalAwareConversion<T_Engine, interval::CC, T_Integer, T_Floating>
112 {
113 constexpr auto operator()(T_Integer const& i) const
114 {
115 return static_cast<T_Floating>(i) * Constants<T_Integer, T_Floating>::normalizedIntervalBin;
116 };
117 };
118
119 /** Adapt the bit length of the engine output to match the target type.
120 * This is the default case where the engine result type already matches and thus the engine is simply invoked.
121 */
122 template<typename T_Engine, uint32_t byteLengthEngineResult, uint32_t byteLengthRealType>
123 struct BitLengthConformityAdapter
124 {
125 static_assert(
126 (byteLengthEngineResult == 4u || byteLengthRealType == 8u),
127 "Result returned by the randomBitGenerator does not have a length that is accepted by the uniformReal "
128 "distribution!");
129 static_assert(
130 (byteLengthEngineResult == 8u || byteLengthRealType == 4u),
131 "The requested floating point type does not have a length that is accepted by the uniformReal "
132 "distribution!");
133 static_assert(
134 byteLengthEngineResult == byteLengthRealType,
135 "By logic this should never fail in case the compiler accepts the specialization of the adapter!");
136
137 constexpr auto operator()(T_Engine& engine)
138 {
139 return engine();
140 }
141 };
142
143 /** Adapts a 32-bit engine output to a 64-bit value. This involves invoking the engine twice. */
144 template<typename T_Engine>
145 struct BitLengthConformityAdapter<T_Engine, 4u, 8u>
146 {
147 constexpr auto operator()(T_Engine& engine)
148 {
149 return static_cast<uint64_t>(engine()) << 32 | static_cast<uint64_t>(engine());
150 }
151 };
152
153 /** Adapt a 64-bit engine output to a 32-bit value. Uses a simple narrowing conversion.*/
154 template<typename T_Engine>
155 struct BitLengthConformityAdapter<T_Engine, 8u, 4u>
156 {
157 constexpr auto operator()(T_Engine& engine)
158 {
159 return static_cast<uint32_t>(engine());
160 }
161 };
162
163 /** Generate a floating-point value in the requested interval.
164 *
165 * Adapts the engine output to the required bit length and converts the integer
166 * to a normalized floating point value in the requested interval */
167 template<concepts::Interval T_Interval, std::floating_point T_Result, typename T_Engine>
168 constexpr auto getNormalizedUniformReal(T_Engine& engine) -> T_Result
169 {
170 using T_EngineResult = std::remove_cvref_t<decltype(engine())>;
171 // generates an integer the length of the size T_Result
172 auto adaptedBits = BitLengthConformityAdapter<
173 T_Engine,
174 static_cast<uint32_t>(sizeof(T_EngineResult)),
175 static_cast<uint32_t>(sizeof(T_Result))>{}(engine);
176 // convert randomBits into the required floating-point type, while respecting the requested bounds criteria
177 return IntervalAwareConversion<T_Engine, T_Interval, ALPAKA_TYPEOF(adaptedBits), T_Result>{}(adaptedBits);
178 }
179 template<concepts::UniformVectorEngine T_Engine, uint32_t TResultSize, uint32_t TElemSize, uint32_t TElems>
180 struct vectorDispatchWrapper;
181
182 template<concepts::UniformVectorEngine T_Engine, uint32_t TElemSize, uint32_t TElems>
183 struct vectorDispatchWrapper<T_Engine, 4u, TElemSize, TElems>
184 {
185 T_Engine& ph;
186 static_assert(TElems > 0, "RandomEngine did not return any elements!");
187
188 constexpr explicit vectorDispatchWrapper(T_Engine& eng) : ph(eng)
189 {
190 }
191
192 constexpr uint32_t operator()() const
193 {
194 auto res = ph();
195 return static_cast<uint32_t>(res[0]);
196 }
197 };
198
199 /// **Wrapper specialization enabling efficient generation of 64-bit values from vectorized engines without
200 /// requiring two engine calls.**
201 template<concepts::UniformVectorEngine T_Engine, uint32_t TElems>
202 struct vectorDispatchWrapper<T_Engine, 8u, 4u, TElems>
203 {
204 T_Engine& ph;
205 using TResult = decltype(ph());
206 static constexpr auto dim = TResult::dim();
207 static_assert(TElems >= 2, "Engine result dimension must be >= 2, to be usable in UniformReal<double>");
208
209 constexpr explicit vectorDispatchWrapper(T_Engine& eng) : ph(eng)
210 {
211 }
212
213 constexpr uint64_t operator()() const
214 {
215 auto res = ph();
216 return (static_cast<uint64_t>(res[0]) << 32) | static_cast<uint64_t>(res[1]);
217 }
218 };
219
220 template<std::floating_point T_Floating, concepts::Interval T_Interval>
221 class UniformRealBase
222 {
223 public:
224 using result_type = T_Floating;
225
226 using Interval_type = T_Interval;
227
228 constexpr explicit UniformRealBase(T_Floating min, T_Floating max, [[maybe_unused]] T_Interval)
229 : m_min(min)
230 , m_max(max)
231 , m_range(m_max - m_min) // abs is a fail-safe in case min>max
232 {
233 }
234
235 protected:
236 T_Floating const m_min;
237 T_Floating const m_max;
238 T_Floating const m_range;
239 };
240} // namespace alpaka::rand::distribution::internal
241
243{
244 /** Select a floating-point value from a uniform interval.
245 *
246 * This generator produces floating-point values of type `T_Result` drawn from a uniform
247 * interval `[a, b)` or `(a, b]`, depending on the interval type specified via `Interval_v`: default case is CO
248 * ->[a,b). The interface mirrors `std::uniform_real_distribution`, and can be invoked with any engine
249 * adhering to the 'UniformRandomEngine' concept, which includes std uniform engines.
250 *
251 * **Supported result types:** `float`, `double`
252 * **Supported engine result widths:** 32-bit and 64-bit unsigned integers.
253 *
254 * @note This distribution is subject to a small non-uniform bias; see
255 * **UniformReal::operator()** for details.
256 */
257 template<std::floating_point T_Result, concepts::Interval T_Interval = interval::CO>
258 struct UniformReal : internal::UniformRealBase<T_Result, T_Interval>
259 {
260 static_assert(static_cast<uint32_t>(sizeof(T_Result)) == 4u || static_cast<uint32_t>(sizeof(T_Result)) == 8u);
261
262 template<std::integral T_Value>
263 static consteval void checkValueConformity()
264 {
265 static_assert(
266 static_cast<uint32_t>(sizeof(T_Value)) == 4u || static_cast<uint32_t>(sizeof(T_Value)) == 8u);
267 }
268
269 using Base = internal::UniformRealBase<T_Result, T_Interval>;
270
271 constexpr explicit UniformReal([[maybe_unused]] T_Interval interval = T_Interval{}) : Base(0, 1, interval)
272 {
273 }
274
275 constexpr UniformReal(T_Result min, T_Result max, [[maybe_unused]] T_Interval interval = T_Interval{})
276 : Base(min, max, interval)
277 {
278 }
279
280 /** Selects a value from a uniform distribution over the configured (min, max) interval,
281 * respecting the specified interval bounds.**
282 *
283 * **Input:** a random engine conforming to the `UniformRandomEngine` concept
284 * (currently accepts stdlib uniform engines and alpaka engines included in the alpaka::rand::engine namespace)
285 * **Output:** a floating-point value sampled from the configured distribution.
286 *
287 * @note: This distribution introduces a slight numerical bias due to floating-point
288 * rounding effects and the use of a **1 / MAX** integer-to-floating-point conversion methods
289 * @see Goualard, F. (2020). Generating Random Floating-Point Numbers by Dividing Integers: A Case Study.
290 * https://doi.org/10.1007/978-3-030-50417-5_2 or Allen B. Downey Generating Pseudo-random Floating-Point
291 * Values https://allendowney.com/research/rand/.
292 * Additionally, using an interval with an open bound
293 * (OC,CO,OO) may introduce yet another small non-uniform bias -- @see scaleInterval().
294 */
295 template<concepts::UniformRandomEngine T_Engine>
296 constexpr auto operator()(T_Engine& engine) const -> T_Result
297 {
298 return engineDispatch(engine);
299 }
300
301 private:
302 /** Dispatch for std engines and alpaka abbreviations conforming to the std::uniform_random_bit_generator
303 * concept (e.g. Philox4x32x10)
304 */
305 template<concepts::UniformStdEngine T_Engine>
306 constexpr auto engineDispatch(T_Engine& engine) const -> T_Result
307 {
308 using T_EngineResult = ALPAKA_TYPEOF(engine());
310 T_Result res = internal::getNormalizedUniformReal<T_Interval, T_Result, T_Engine>(engine);
311 // @TODO potentially add underflow protection as suggested by https://doi.org/10.1145/3503512
312 return scaleInterval(res);
313 }
314
315 /** Dispatch for vector engines (uniform bit generators that return a vector (e.g Philox4x32x10Vector) to
316 * enable efficient double precision uniform_real generation (reducing the number of invocations)
317 */
318 template<concepts::UniformVectorEngine T_Engine>
319 constexpr auto engineDispatch(T_Engine& engine) const -> T_Result
320 {
321 using T_EngineResult = ALPAKA_TYPEOF(engine());
322 using valueType = typename T_EngineResult::type;
324 constexpr auto dim = getDim(T_EngineResult{});
325 auto dispatchWrapper = internal::vectorDispatchWrapper<
326 T_Engine,
327 static_cast<uint32_t>(sizeof(T_Result)),
328 static_cast<uint32_t>(sizeof(valueType)),
329 dim>(engine);
330 using T_DispatchWrapper = decltype(dispatchWrapper);
331 T_Result res
332 = internal::getNormalizedUniformReal<T_Interval, T_Result, T_DispatchWrapper>(dispatchWrapper);
333 return scaleInterval(res);
334 }
335
336 /** For open bounds, the normalized value in (0, 1) may (still) hit the lower or upper bound
337 * due to floating-point rounding (after scaling is applied).
338 *
339 * To enforce adherence to the requested interval, the result is shifted to the next representable
340 * value using std::nextafter.
341 * WARNING: std::nextafter is not constexpr in cpp20 -> this might cause problems on some devices (regarding
342 * missing __device__ annotations) and requires often unnecessary runtime evaluation -- future
343 * maintainers should consider creating a constexpr adaptation of std::nextafter
344 *
345 * @note This introduces a(-/another) small non-uniform bias. The current implementation is inherently
346 * non-uniform due to integer-to-floating-point mapping @see https://doi.org/10.1007/978-3-030-50417-5_2
347
348
349 */
350 constexpr auto scaleInterval(T_Result const& normalizedVal) const -> T_Result
351 {
352 T_Result res = normalizedVal * this->m_range + this->m_min;
353
354 if constexpr(std::is_same_v<T_Interval, interval::OC> || std::is_same_v<T_Interval, interval::OO>)
355 {
356 if(res == this->m_min)
357 res = std::nextafter(this->m_min, this->m_max);
358 }
359 if constexpr(std::is_same_v<T_Interval, interval::CO> || std::is_same_v<T_Interval, interval::OO>)
360 {
361 if(res == this->m_max)
362 res = std::nextafter(this->m_max, this->m_min);
363 }
364 return res;
365 }
366 };
367} // namespace alpaka::rand::distribution
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:153
constexpr auto min(auto const &a, auto const &b)
Definition math.hpp:272
constexpr auto max(auto const &a, auto const &b)
Definition math.hpp:278
consteval uint32_t getDim(T const &any)
Definition trait.hpp:116
constexpr UniformReal(T_Result min, T_Result max, T_Interval interval=T_Interval{})
constexpr auto operator()(T_Engine &engine) const -> T_Result
Selects a value from a uniform distribution over the configured (min, max) interval,...
static consteval void checkValueConformity()
internal::UniformRealBase< T_Result, T_Interval > Base
constexpr UniformReal(T_Interval interval=T_Interval{})