alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IdxLayer.hpp
Go to the documentation of this file.
1/* Copyright 2024 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/Vec.hpp"
9
10#if ALPAKA_LANG_HIP
11
12namespace alpaka::onAcc
13{
14 namespace unifiedCudaHip
15 {
16 template<typename T_OptimizedThreadSpec>
17 struct BlockLayer
18 {
19 T_OptimizedThreadSpec const& m_optimizedThreadSpec;
20 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
21 using IdxType = typename T_OptimizedThreadSpec::NumBlocksVecType::type;
22
23 constexpr BlockLayer(T_OptimizedThreadSpec const& optimizedThreadSpec)
24 : m_optimizedThreadSpec(optimizedThreadSpec)
25 {
26 }
27
28 constexpr auto idx() const
29 {
30 if constexpr(dim <= 3u)
31 {
32 return Vec<IdxType, 3u>{hipBlockIdx_z, hipBlockIdx_y, hipBlockIdx_x}.template rshrink<dim>();
33 }
34 else
35 {
36 return mapToND(m_optimizedThreadSpec.getNumBlocks(), static_cast<IdxType>(hipBlockIdx_x));
37 }
38 }
39
40 constexpr auto count() const
41 {
42 if constexpr(dim <= 3u)
43 {
44 return Vec<IdxType, 3u>{hipGridDim_z, hipGridDim_y, hipGridDim_x}.template rshrink<dim>();
45 }
46 else
47 {
48 return m_optimizedThreadSpec.getNumBlocks();
49 }
50 }
51 };
52
53 template<typename T_OptimizedThreadSpec>
54 struct ThreadLayer
55 {
56 T_OptimizedThreadSpec const& m_optimizedThreadSpec;
57 static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();
58 using IdxType = typename T_OptimizedThreadSpec::NumThreadsVecType::type;
59
60 constexpr ThreadLayer(T_OptimizedThreadSpec const& optimizedThreadSpec)
61 : m_optimizedThreadSpec(optimizedThreadSpec)
62 {
63 }
64
65 constexpr auto idx() const
66 {
67 if constexpr(dim <= 3u)
68 {
69 return Vec<IdxType, 3u>{hipThreadIdx_z, hipThreadIdx_y, hipThreadIdx_x}.template rshrink<dim>();
70 }
71 else
72 {
73 return mapToND(m_optimizedThreadSpec.getNumThreads(), static_cast<IdxType>(hipThreadIdx_x));
74 }
75 }
76
77 constexpr auto count() const
78 {
79 if constexpr(dim <= 3u)
80 {
81 return Vec<IdxType, 3u>{hipBlockDim_z, hipBlockDim_y, hipBlockDim_x}.template rshrink<dim>();
82 }
83 else
84 {
85 return m_optimizedThreadSpec.getNumThreads();
86 }
87 }
88
89 constexpr auto count() const
90 requires alpaka::concepts::CVector<typename T_OptimizedThreadSpec::NumThreadsVecType>
91 {
92 return typename T_OptimizedThreadSpec::NumThreadsVecType{};
93 }
94 };
95 } // namespace unifiedCudaHip
96} // namespace alpaka::onAcc
97
98#endif
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.
Definition Vec.hpp:873