alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
StaticSharedMemory.hpp
Go to the documentation of this file.
1/* Copyright 2025 Rene Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/Vec.hpp"
11#include "alpaka/tag.hpp"
12
13#if ALPAKA_LANG_ONEAPI
14
15# include <sycl/sycl.hpp>
16
17# include <functional>
18
19namespace alpaka::onAcc
20{
21 namespace oneApi
22 {
23 namespace detail
24 {
25 /** Pointer lookup table
26 *
27 * Provides a dynamic lookup table to map an unique id to a pointer.
28 */
29 class PtrLookupTable
30 {
31 struct MetaData
32 {
33 //! pointer to allocated data
34 std::byte* ptr = nullptr;
35 //! Unique id if the next data chunk.
36 size_t id = std::numeric_limits<size_t>::max();
37 };
38
39 static constexpr uint32_t metaDataSize = sizeof(MetaData);
40
41 public:
42# ifndef NDEBUG
43 PtrLookupTable(std::byte* mem, uint32_t capacity)
44 : m_mem(reinterpret_cast<MetaData*>(mem))
45 , m_capacity(capacity / metaDataSize)
46 {
47 ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
48 }
49# else
50 PtrLookupTable(std::byte* mem, uint32_t) : m_mem(reinterpret_cast<MetaData*>(mem))
51 {
52 }
53# endif
54
55 /** number of bytes required for bookkeeping of maxNumberOfAllocations unique allocations
56 *
57 * @param maxNumUniqueAllocations number of unique allocation a user is allowed to perform
58 * @return bytes required to store lookup meta data
59 */
60 static consteval uint32_t sizeLookupBufferInBytes(uint32_t maxNumUniqueAllocations)
61 {
62 return metaDataSize * maxNumUniqueAllocations;
63 }
64
65 /* With oneApi 2025.2 the behaviour of shared memory allocation has changed. IT behaves like cuda
66 * shared memory. Therefore, we need a unique data type to avoid pointer aliasing. Using the helper
67 * class for data alignment is backward compatible to previous versions. The reason for using std::byte
68 * is that this guaranteed support for data types which are not trivially constructible.
69 */
70 template<typename T, size_t T_id>
71 struct alignas(T) SharedMemData
72 {
73 std::byte data[sizeof(T)];
74 };
75
76 template<typename T, size_t T_id>
77 T* alloc() const
78 {
79 auto group = sycl::ext::oneapi::this_work_item::get_work_group<1>();
80 SharedMemData<T, T_id>* data
81 = sycl::ext::oneapi::group_local_memory_for_overwrite<SharedMemData<T, T_id>>(group);
82
83 MetaData& metaDataEntry = m_mem[m_numEntries];
84 ++m_numEntries;
85 ALPAKA_ASSERT_ACC(m_numEntries <= m_capacity);
86
87 // Update meta data with id and pointer to the current allocation
88 if(group.get_local_linear_id() == 0u)
89 {
90 // only one thread must update the pointer in shared memory
91 metaDataEntry.ptr = reinterpret_cast<std::byte*>(data);
92 }
93 metaDataEntry.id = T_id;
94
95 return reinterpret_cast<T*>(data);
96 }
97
98 //! Give the pointer to an exiting variable
99 //!
100 //! @tparam T type of the variable
101 //! @param id unique id of the variable
102 //! @return nullptr if variable with id not exists
103 template<typename T>
104 auto getVarPtr(size_t id) const -> T*
105 {
106 // Iterate over metadata
107 for(uint32_t off = 0u; off < m_numEntries; ++off)
108 {
109 MetaData& metaDataEntry = m_mem[off];
110
111 if(metaDataEntry.id == id)
112 return reinterpret_cast<T*>(metaDataEntry.ptr);
113 }
114
115 // Variable not found.
116 return nullptr;
117 }
118
119 private:
120 //! Number unqiue meta data entries stored
121 mutable uint32_t m_numEntries = 0u;
122
123 //! Memory layout
124 //! |Header|Padding|Variable|Padding|Header|....uninitialized Data ....
125 //! Size of padding can be zero if data after padding is already aligned.
126 MetaData* const m_mem;
127# ifndef NDEBUG
128 //! max number of meta data entries
129 uint32_t const m_capacity;
130# endif
131 };
132 } // namespace detail
133
134 class StaticSharedMemory : private detail::PtrLookupTable
135 {
136 public:
137 /** number of bytes required for bookkeeping of mayNumberOfAllocations unique allcoations
138 *
139 * @param maxNumUniqueAllocations number of unique allocation a user is allowed to perform
140 * @return bytes required to store lookup meta data
141 */
142 static consteval uint32_t sizeLookupBufferInBytes(uint32_t maxNumUniqueAllocations)
143 {
144 return detail::PtrLookupTable::sizeLookupBufferInBytes(maxNumUniqueAllocations);
145 }
146
147 StaticSharedMemory(StaticSharedMemory const&) = delete;
148
149 /** Construct shared memory allocator
150 * @param accessor local memory accessor to store lookup meta data
151 * bytes required to store N unique allocation can be calculated with
152 * sizeLookupBufferInBytes()
153 */
154 StaticSharedMemory(sycl::local_accessor<std::byte> const& accessor)
155 : PtrLookupTable(
156 reinterpret_cast<std::byte*>(accessor.get_multi_ptr<sycl::access::decorated::no>().get()),
157 static_cast<uint32_t>(accessor.size()))
158
159 {
160 }
161
162 using Base = detail::PtrLookupTable;
163
164 template<typename T, size_t T_unique>
165 T& allocVar()
166 {
167 T* data = Base::template getVarPtr<T>(T_unique);
168
169 if(!data)
170 {
171 data = Base::template alloc<T, T_unique>();
172 }
173 ALPAKA_ASSERT(data != nullptr);
174 return *data;
175 }
176 };
177
178 } // namespace oneApi
179} // namespace alpaka::onAcc
180
181#endif
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
Definition Assert.hpp:53
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:14
constexpr auto oneApi
Definition Api.hpp:29
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38
decltype(auto) data(auto &&any)
pointer to data of an object
auto alloc(concepts::Device auto const &device, alpaka::concepts::VectorOrScalar auto const &extents)
Allocate memory on the given device.
Definition Device.hpp:180
constexpr decltype(auto) get(concepts::SpecializationOf< Dict > auto &t) noexcept
Definition Dict.hpp:151