alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Queue.hpp
Go to the documentation of this file.
1/* Copyright 2024 René Widera
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
13#include "alpaka/api/util.hpp"
16#include "alpaka/interface.hpp"
25
26#include <cstdint>
27#include <cstring>
28#include <future>
29#include <sstream>
30
31namespace alpaka::onHost
32{
33 namespace cpu
34 {
35 template<typename T_Device>
36 struct Queue : std::enable_shared_from_this<Queue<T_Device>>
37 {
38 public:
39 Queue(internal::concepts::DeviceHandle auto device, uint32_t const idx, uint32_t numIdx, bool isBlocking)
40 : m_device(std::move(device))
41 , m_idx(idx)
42 , m_numaIdx(numIdx)
43 , m_workerThread(numIdx)
44 , m_isBlocking(isBlocking)
45 {
47 }
48
54
55 Queue(Queue const&) = delete;
56 Queue& operator=(Queue const&) = delete;
57
58 Queue(Queue&&) = delete;
59 Queue& operator=(Queue&&) = delete;
60
61 bool operator==(Queue const& other) const
62 {
63 return m_idx == other.m_idx && m_device == other.m_device;
64 }
65
66 bool operator!=(Queue const& other) const
67 {
68 return !(*this == other);
69 }
70
71 private:
72 void _()
73 {
75 }
76
78 uint32_t m_idx = 0u;
79 uint32_t m_numaIdx = 0u;
81 bool m_isBlocking{false};
82 /** Flag to show if a blocking tasks is executed
83 *
84 * This variable is only used if m_isBlocking == true.
85 *
86 * state: If true a thread is executing a blocking tasks, else false.
87 */
88 std::atomic<bool> m_isBlockingTaskExecuted{false};
89
90 /** Mutex to ensure sequential execution of tasks and operation if the queue is blocking.
91 *
92 * For non-blocking queue @c m_workerThread is taking care of the execution order
93 */
94 std::mutex m_mutex;
95
96 /** Submit a task to the queue.
97 *
98 * Centralizes blocking / non-blocking behavior within the method to keep other code as easy as possible.
99 * For a blocking queue this method is NOT giving the control back to the caller until the operation is
100 * processed.
101 * All internal calls should use this method and not enqueue tasks directly in @c m_workerThread
102 */
103 template<typename T_Fn>
104 auto submit(T_Fn&& fn)
105 {
107 if(m_isBlocking)
108 {
109 std::lock_guard<std::mutex> lk(m_mutex);
111 fn();
112 // silent tsan warnings: The promise is fulfilled directly and only a future which is true is
113 // returned, there can not be a data race in between.
114#if defined(__GNUC__) && !defined(__clang__)
115# pragma GCC diagnostic push
116# pragma GCC diagnostic ignored "-Wtsan"
117#endif
118 // return a ready future-like placeholder; reuse CallbackThread interface minimally
119 std::promise<void> p;
120 auto f = p.get_future();
121 p.set_value();
122#if defined(__GNUC__) && !defined(__clang__)
123# pragma GCC diagnostic pop
124#endif
126 // to keep the uniform interface with the non-blocking case,
127 // return by moving the f since it is move-only
128 return f;
129 }
130 // enqueue the task into the worker thread, callers can wait/chain later.
131 return m_workerThread.submit(std::forward<T_Fn>(fn));
132 }
133
135
136 std::string getName() const
137 {
138 return std::string("host::Queue id=") + std::to_string(m_idx);
139 }
140
142
143 [[nodiscard]] auto getNativeHandle() const noexcept
144 {
145 return m_idx;
146 }
147
148 friend struct internal::Enqueue;
149
150 template<alpaka::onHost::concepts::ThreadSpec T_ThreadSpec>
151 void enqueue(T_ThreadSpec const& threadSpec, auto const& kernelBundle)
152 {
153 static_assert(
154 ALPAKA_TYPEOF(threadSpec)::getExecutor() != exec::anyExecutor,
155 "'exec::anyExecutor' can not be used to enqueue an kernel.");
158
159 /* Only set the thread affinity if we use a blocking queue, else the affinity is already set in the
160 * callback thread. The callback thread affinity will be given to all threads created bya task executed
161 * by the callback thread. */
162 bool setThreadAffinity = m_isBlocking;
163 submit(
164 [kernelBundle, threadSpec, deviceKind, numIdx = m_numaIdx, setThreadAffinity]()
165 {
166 auto moreLayer = Dict{
167 DictEntry(object::launchedWidthFrameSpec, std::false_type{}),
170 DictEntry(object::exec, threadSpec.getExecutor())};
171 onAcc::Acc acc = makeAcc(threadSpec, numIdx, setThreadAffinity);
172 acc(kernelBundle, moreLayer);
173 });
174 }
175
176 template<alpaka::onHost::concepts::FrameSpec T_FrameSpec>
177 void enqueue(T_FrameSpec const& frameSpec, auto const& kernelBundle)
178 {
179 static_assert(
180 ALPAKA_TYPEOF(frameSpec)::getExecutor() != exec::anyExecutor,
181 "'exec::anyExecutor' can not be used to enqueue an kernel.");
183 auto adjustedThreadSpec = internal::adjustThreadSpec(*m_device.get(), frameSpec, kernelBundle);
185
186 /* Only set the thread affinity if we use a blocking queue, else the affinity is already set in the
187 * callback thread. The callback thread affinity will be given to all threads created bya task executed
188 * by the callback thread. */
189 bool setThreadAffinity = m_isBlocking;
190 submit(
191 [kernelBundle, adjustedThreadSpec, deviceKind, numIdx = m_numaIdx, setThreadAffinity]()
192 {
193 auto moreLayer = Dict{
194 DictEntry(object::launchedWidthFrameSpec, std::true_type{}),
197 DictEntry(object::exec, adjustedThreadSpec.getExecutor())};
198 onAcc::Acc acc = makeAcc(adjustedThreadSpec, numIdx, setThreadAffinity);
199 acc(kernelBundle, moreLayer);
200 });
201 }
202
203 /** execute a task in the queue
204 *
205 * @attention Do NOT enqueue a task which captures the queue internally to keep the queue alive as
206 * dependency. In this case the destructure of the queue is not called.
207 */
208 void enqueueHostFn(auto const& task)
209 {
211 submit([task]() { task(); });
212 }
213
214 void enqueueHostFnDeferred(auto const& task)
215 {
217 m_workerThread.submit(task);
218 }
219
221
222 auto getDeviceKind() const
223 {
225 }
226
227 auto getDevice() const
228 {
229 return m_device;
230 }
231
232 std::shared_ptr<Queue> getSharedPtr()
233 {
234 return this->shared_from_this();
235 }
236
238
239 /** Checks if the queue is empty
240 *
241 * If m_isBlocking is true, only tasks will be taken into account, events will be ignored they could not
242 * influence the usage of isQueueEmpty. if m_isBlocking is false, events will be taken into account because
243 * they are handled as normal tasks.
244 *
245 * @return true if no tasks is executed else false
246 */
247 bool isQueueEmpty() const
248 {
250 if(m_isBlocking)
251 {
252 // check if the queue is currently executing a blocking task
254 }
255 else
256 {
257 return m_workerThread.isEmpty();
258 }
259 }
260
262
263 friend struct internal::Wait;
264 friend struct internal::WaitFor;
265 friend struct internal::Memcpy;
267 friend struct internal::Memset;
270 };
271 } // namespace cpu
272
273 namespace internal
274 {
275 template<typename T_Device>
276 struct Wait::Op<cpu::Queue<T_Device>>
277 {
279 {
281 /* If empty -> Enqueue an empty task as marker and wait for the future
282 * else there is no need to wait
283 */
284 if(queue.isQueueEmpty() == false)
285 {
286 queue.submit([]() {}).wait();
287 }
288 }
289 };
290
291 template<typename T_Device, typename T_Event>
292 struct Enqueue::Event<cpu::Queue<T_Device>, T_Event>
293 {
294 void operator()(cpu::Queue<T_Device>& queue, T_Event& event) const
295 {
297 // open a scope to avoid logging during we hold the lock for this class
298 {
299 // Setting the event state (e.g. the future) and enqueuing it has to be atomic.
300 std::lock_guard<std::mutex> lk(event.m_mutex);
301
302 ++event.m_enqueueCount;
303
304 auto const enqueueCount = event.m_enqueueCount;
305
306 /* In case the queue is blocking we can not use queue.submit() because we hold the lock already.
307 * The blocking queue executes the lambda directly which will create a deadlock.
308 */
309 if(queue.m_isBlocking)
310 {
311 // Nothing to do if it has been re-enqueued to a later position in the queue.
312 if(enqueueCount == event.m_enqueueCount)
313 {
314 event.m_LastReadyEnqueueCount = std::max(enqueueCount, event.m_LastReadyEnqueueCount);
315 }
316 // apply a fulfilled future
317 std::promise<void> p;
318 p.set_value();
319 event.m_future = p.get_future();
320 }
321 else
322 {
323 auto sharedEvent = event.getSharedPtr();
324 // Enqueue a task that only resets the events flag if it is completed.
325 event.m_future = queue.submit(
326 [sharedEvent, enqueueCount]() mutable
327 {
328 std::unique_lock<std::mutex> lk2(sharedEvent->m_mutex);
329
330 // Nothing to do if it has been re-enqueued to a later position in the queue.
331 if(enqueueCount == sharedEvent->m_enqueueCount)
332 {
333 sharedEvent->m_LastReadyEnqueueCount
334 = std::max(enqueueCount, sharedEvent->m_LastReadyEnqueueCount);
335 }
336 });
337 }
338 }
339 }
340 };
341
342 template<typename T_Device, typename T_Event>
343 struct WaitFor::Op<cpu::Queue<T_Device>, T_Event>
344 {
346 {
348 // open a scope to avoid logging during we hold the lock for this class
349 {
350 // Setting the event state and enqueuing it has to be atomic.
351 std::unique_lock<std::mutex> lk(event.m_mutex);
352
353 if(!event.isReady())
354 {
355 /* In case the queue is blocking we can not use queue.submit() because we hold the lock
356 * already. The blocking queue executes the lambda directly which will create a deadlock.
357 */
358 if(queue.m_isBlocking)
359 {
360 std::shared_future sFuture = event.m_future;
361 lk.unlock();
362 sFuture.get();
363 }
364 else
365 {
366 auto sharedEvent = event.getSharedPtr();
367 auto oldFuture = event.m_future;
368
369 // unlock here to avoid keeping the look during the maybe expensive enqueue of the task
370 lk.unlock();
371 // Enqueue a task that waits for the given future of the event.
372 queue.submit([sharedEvent, oldFuture]() { oldFuture.get(); });
373 }
374 }
375 }
376 }
377 };
378
379 template<typename T_Device, typename T_Dest, typename T_Source, typename T_Extents>
380 struct Memcpy::Op<cpu::Queue<T_Device>, T_Dest, T_Source, T_Extents>
381 {
382 void operator()(cpu::Queue<T_Device>& queue, auto&& dest, T_Source const& source, T_Extents const& extents)
383 const requires std::same_as<ALPAKA_TYPEOF(dest), T_Dest>
384 {
386 constexpr auto dim = alpaka::trait::getDim_v<T_Extents>;
387
388 /* Get all required properties outside the lambda function to not extend the life-time of the data.
389 * The life-time is not extended to have some life-time behaviours with all backends.
390 */
391 void* destPtr = toVoidPtr(alpaka::onHost::data(ALPAKA_FORWARD(dest)));
392 void const* srcPtr = toVoidPtr(alpaka::onHost::data(source));
393
394 if constexpr(dim == 1u)
395 {
396 queue.submit(
397 [extents, destPtr, srcPtr]()
398 {
399 std::memcpy(destPtr, srcPtr, extents.x() * sizeof(alpaka::trait::GetValueType_t<T_Dest>));
400 });
401 }
402 else
403 {
404 // memcpy is implemented as row wise copy therefore the last dimension is not required
405 auto destPitchBytesWithoutColumn = dest.getPitches().eraseBack();
406 auto sourcePitchBytesWithoutColumn = source.getPitches().eraseBack();
407
408 queue.submit(
409 [extents, destPtr, srcPtr, destPitchBytesWithoutColumn, sourcePitchBytesWithoutColumn]()
410 {
411 auto const dstExtentWithoutColumn = extents.eraseBack();
412 if(static_cast<std::size_t>(extents.product()) != 0u)
413 {
415 dstExtentWithoutColumn,
416 [&](auto const& idx)
417 {
418 std::memcpy(
419 reinterpret_cast<std::uint8_t*>(destPtr)
420 + (idx * destPitchBytesWithoutColumn).sum(),
421 reinterpret_cast<std::uint8_t const*>(srcPtr)
422 + (idx * sourcePitchBytesWithoutColumn).sum(),
423 static_cast<size_t>(extents.back())
425 });
426 }
427 });
428 }
429 }
430 };
431
432 // copy to device global memory
433 template<typename T_Device, typename T_Source, typename T_Storage, typename T>
434 struct internal::MemcpyDeviceGlobal::
435 Op<cpu::Queue<T_Device>, onAcc::internal::GlobalDeviceMemoryWrapper<T_Storage, T>, T_Source>
436 {
440 auto&& source) const
441 {
443 auto* destPtr = dest.getHandle(api::host).data();
444 void const* srcPtr{nullptr};
445 if constexpr(std::is_pointer_v<ALPAKA_TYPEOF(source)>)
446 srcPtr = source;
447 else
449 queue.submit([destPtr, srcPtr]() { std::memcpy(destPtr, srcPtr, sizeof(T)); });
450 }
451 };
452
453 // copy from device global memory
454 template<typename T_Device, typename T_Dest, typename T_Storage, typename T>
455 struct internal::MemcpyDeviceGlobal::
456 Op<cpu::Queue<T_Device>, T_Dest, onAcc::internal::GlobalDeviceMemoryWrapper<T_Storage, T>>
457 {
460 auto&& dest,
462 {
464 void* destPtr{nullptr};
465 if constexpr(std::is_pointer_v<ALPAKA_TYPEOF(dest)>)
466 destPtr = dest;
467 else
469 auto const* srcPtr = source.getHandle(api::host).data();
470 queue.submit([destPtr, srcPtr]() { std::memcpy(destPtr, srcPtr, sizeof(T)); });
471 }
472 };
473
474 template<typename T_Device, typename T_Dest, typename T_Extents>
475 struct Memset::Op<cpu::Queue<T_Device>, T_Dest, T_Extents>
476 {
477 /** @attention Do not use `requires std::same_as<ALPAKA_TYPEOF(dest), T_Dest>` here else gcc 11.X
478 * (tested 11.4 and 11.3) will run into an internal compiler segfault during the evaluation of the
479 * constraints */
480 void operator()(cpu::Queue<T_Device>& queue, auto&& dest, uint8_t byteValue, T_Extents const& extents)
481 const requires(std::is_same_v<ALPAKA_TYPEOF(dest), T_Dest>)
482 {
484 constexpr auto dim = alpaka::trait::getDim_v<T_Extents>;
485
486 void* destPtr = static_cast<void*>(alpaka::onHost::data(dest));
487
488 if constexpr(dim == 1u)
489 {
490 queue.submit(
491 [extents, destPtr, byteValue]()
492 {
493 std::memset(
494 destPtr,
495 byteValue,
496 extents.x() * sizeof(alpaka::trait::GetValueType_t<T_Dest>));
497 });
498 }
499 else
500 {
501 // memset is implemented as row wise memset therefore the last dimension is not required
502 auto destPitchBytesWithoutColumn = dest.getPitches().eraseBack();
503 queue.submit(
504 [extents, destPtr, destPitchBytesWithoutColumn, byteValue]()
505 {
506 auto const dstExtentWithoutColumn = extents.eraseBack();
507 if(static_cast<std::size_t>(extents.product()) != 0u)
508 {
510 dstExtentWithoutColumn,
511 [&](auto const& idx)
512 {
513 std::memset(
514 reinterpret_cast<std::uint8_t*>(destPtr)
515 + (idx * destPitchBytesWithoutColumn).sum(),
516 byteValue,
517 static_cast<size_t>(extents.back())
519 });
520 }
521 });
522 }
523 }
524 };
525
526 template<typename T_Device, typename T_Dest, typename T_Value, typename T_Extents>
527 struct Fill::Op<cpu::Queue<T_Device>, T_Dest, T_Value, T_Extents>
528 {
529 void operator()(cpu::Queue<T_Device>& queue, auto&& dest, T_Value elementValue, T_Extents const& extents)
530 const requires std::same_as<ALPAKA_TYPEOF(dest), T_Dest>
531 && std::same_as<alpaka::trait::GetValueType_t<ALPAKA_TYPEOF(dest)>, T_Value>
532 {
534 // avoid that we pass a SharedBuffer and convert non alpaka data views
535 alpaka::concepts::IView<T_Value> auto dataView = makeView(dest);
536
538 queue,
540 dataView.getSubView(extents),
541 elementValue);
542 }
543 };
544
545 /** The code is a copy of the Alloc::Op with the difference that the memory is allocated and freed
546 * within a queue
547 */
548 template<typename T_Type, typename T_Device, alpaka::concepts::Vector T_Extents>
549 struct AllocDeferred::Op<T_Type, cpu::Queue<T_Device>, T_Extents>
550 {
551 static consteval uint32_t highestPowerOfTwo(uint32_t value)
552 {
553 uint32_t result = 1u;
554 while((result << 1u) <= value)
555 {
556 result <<= 1u;
557 }
558 return result;
559 }
560
561 auto operator()(cpu::Queue<T_Device>& queue, T_Extents const& extents) const
562 {
564 auto device = queue.getDevice();
565 constexpr uint32_t alignment = api::util::simdOptimizedAlignment<T_Type>(
566 ALPAKA_TYPEOF(getApi(device)){},
567 ALPAKA_TYPEOF(getDeviceKind(device)){});
568 auto [memSizeInByte, pitches] = api::util::emulatedAlignedMemDescription<T_Type>(alignment, extents);
569
570 auto deviceDependency = onHost::Device{queue.getDevice()->getSharedPtr()};
571 auto queueDependency = queue.getSharedPtr();
572
573 T_Type* ptr = reinterpret_cast<T_Type*>(alpaka::core::alignedAlloc(alignment, memSizeInByte));
574 device->pinPointer(ptr, memSizeInByte);
575
576 // queueDependency is captured to keep the device alive until the memory is deleted
577 auto deleter = [ptr, queueDep = std::move(queueDependency)]()
578 { queueDep.get()->submit([ptr]() { alpaka::core::alignedFree(alignment, ptr); }); };
579
580 auto sharedBuffer = onHost::SharedBuffer{
581 deviceDependency,
582 ptr,
583 extents,
584 pitches,
585 std::move(deleter),
587
590 [&]()
591 {
592 std::stringstream ss;
593 ss << sharedBuffer;
594 return ss.str();
595 });
596 return sharedBuffer;
597 }
598 };
599 } // namespace internal
600} // namespace alpaka::onHost
601
602namespace alpaka::internal
603{
604 template<typename T_Device>
605 struct GetApi::Op<onHost::cpu::Queue<T_Device>>
606 {
607 inline constexpr auto operator()(auto&& queue) const
608 {
609 return alpaka::getApi(queue.m_device);
610 }
611 };
612} // namespace alpaka::internal
A thread queue executing tasks asynchronously.
#define ALPAKA_TYPEOF(...)
Get the type of instance.
Definition common.hpp:153
#define ALPAKA_FORWARD(instance)
Perfectly forward an instance as argument.
Definition common.hpp:147
Interface concept for objects describing api-related multidimensional memory access.
Definition IView.hpp:56
#define ALPAKA_LOG_INFO(logLvl, callable)
Write a meta data message to the output.
Definition logger.hpp:106
#define ALPAKA_LOG_FUNCTION(logLvl)
Log the entry and exit of a scope.
Definition logger.hpp:95
auto emulatedAlignedMemDescription(uint32_t alignmentInByte, T_Extents extents)
provides a memory description to create multidimensional linewise aligned memory within a one dimensi...
Definition util.hpp:100
constexpr auto simdOptimizedAlignment(auto api, alpaka::concepts::DeviceKind auto deviceKind)
Calculate the best alignment for SIMD optimized memory allocation.
Definition util.hpp:140
constexpr auto host
Definition Api.hpp:39
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, auto ptr)
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
constexpr AnyExecutor anyExecutor
Automatic executor selection.
Definition executor.hpp:33
alpaka'S function interface
Definition fn.hpp:38
void fill(auto &internalQueue, auto executor, alpaka::concepts::IMdSpan< T_Value > auto &&dest, T_Value elementValue)
Definition generic.hpp:63
alpaka internal implementations.
Definition generic.hpp:19
constexpr auto getDeviceKind(auto &&any)
Definition interface.hpp:85
auto ndLoopIncIdx(TExtentVec &idx, TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:73
constexpr DeviceKind deviceKind
Definition tag.hpp:30
constexpr Api api
Definition tag.hpp:24
static auto adjustThreadSpec(auto const &device, onHost::concepts::FrameSpec auto const &frameSpec, KernelBundle< TKernelFn, TArgs... > const &kernelBundle)
void wait(auto &&any)
constexpr auto getDevice(auto &&any)
Definition interface.hpp:77
constexpr auto queue
Definition lvl.hpp:127
constexpr auto kernel
Definition lvl.hpp:142
constexpr auto memory
Definition lvl.hpp:112
constexpr auto event
Definition lvl.hpp:97
Functionality which is usable on the host CPU controller thread.
Definition api.hpp:40
constexpr auto defaultExecutor(internal::concepts::DeviceHandle auto deviceHandle)
Select a default executor for the given device.
Definition trait.hpp:148
std::shared_ptr< T > Handle
Definition Handle.hpp:30
decltype(auto) data(auto &&any)
pointer to data of an object
auto makeAcc(alpaka::onHost::concepts::ThreadSpec auto const &threadSpec, uint32_t numaIdx, bool setThreadAffinity)
Definition Serial.hpp:92
typename GetValueType< T >::type GetValueType_t
Definition trait.hpp:65
constexpr uint32_t getDim_v
Definition trait.hpp:41
auto * toVoidPtr(T inPtr)
Cast a pointer that may or may not point to volatile memory to a (void*) or (void const*).
Definition util.hpp:34
constexpr decltype(auto) getDeviceKind(auto &&any)
Get the device type of an object.
Definition interface.hpp:52
constexpr decltype(auto) getApi(auto &&any)
Get the API an object depends on.
Definition interface.hpp:23
constexpr auto makeView(auto &&anyWithApi, T_ValueType *pointer, concepts::Vector auto const &extents, T_MemAlignment const memAlignment=T_MemAlignment{})
Definition View.hpp:37
STL namespace.
Strongly typed and constexpr representation of a byte-alignment of memory.
Definition Alignment.hpp:26
Helper class to provide access to device global memory variables.
constexpr decltype(auto) getHandle(T_Api api) const
Get the handle to call native API specific memcopy for global device memory operation.
Description of a specific device that one can schedule kernels on.
Definition Device.hpp:32
Life time managed buffer with contiguous data.
std::atomic< bool > m_isBlockingTaskExecuted
Flag to show if a blocking tasks is executed.
Definition Queue.hpp:88
std::string getName() const
Definition Queue.hpp:136
void enqueue(T_ThreadSpec const &threadSpec, auto const &kernelBundle)
Definition Queue.hpp:151
std::shared_ptr< Queue > getSharedPtr()
Definition Queue.hpp:232
bool operator!=(Queue const &other) const
Definition Queue.hpp:66
Handle< T_Device > m_device
Definition Queue.hpp:77
Queue & operator=(Queue const &)=delete
auto getDeviceKind() const
Definition Queue.hpp:222
auto submit(T_Fn &&fn)
Submit a task to the queue.
Definition Queue.hpp:104
bool operator==(Queue const &other) const
Definition Queue.hpp:61
Queue & operator=(Queue &&)=delete
void enqueue(T_FrameSpec const &frameSpec, auto const &kernelBundle)
Definition Queue.hpp:177
std::mutex m_mutex
Mutex to ensure sequential execution of tasks and operation if the queue is blocking.
Definition Queue.hpp:94
core::CallbackThread m_workerThread
Definition Queue.hpp:80
Queue(internal::concepts::DeviceHandle auto device, uint32_t const idx, uint32_t numIdx, bool isBlocking)
Definition Queue.hpp:39
bool isQueueEmpty() const
Checks if the queue is empty.
Definition Queue.hpp:247
Queue(Queue &&)=delete
void enqueueHostFnDeferred(auto const &task)
Definition Queue.hpp:214
void enqueueHostFn(auto const &task)
execute a task in the queue
Definition Queue.hpp:208
Queue(Queue const &)=delete
auto getNativeHandle() const noexcept
Definition Queue.hpp:143
auto operator()(cpu::Queue< T_Device > &queue, T_Extents const &extents) const
Definition Queue.hpp:561
void operator()(cpu::Queue< T_Device > &queue, T_Event &event) const
Definition Queue.hpp:294
void operator()(cpu::Queue< T_Device > &queue, auto &&dest, T_Value elementValue, T_Extents const &extents) const
Definition Queue.hpp:529
void operator()(cpu::Queue< T_Device > &queue, auto &&dest, T_Source const &source, T_Extents const &extents) const
Definition Queue.hpp:382
void operator()(cpu::Queue< T_Device > &queue, auto &&dest, uint8_t byteValue, T_Extents const &extents) const
Definition Queue.hpp:480
void operator()(cpu::Queue< T_Device > &queue, cpu::Event< T_Device > &event) const
Definition Queue.hpp:345
void operator()(cpu::Queue< T_Device > &queue) const
Definition Queue.hpp:278
void operator()(cpu::Queue< T_Device > &queue, auto &&dest, onAcc::internal::GlobalDeviceMemoryWrapper< T_Storage, T > source) const
Definition Queue.hpp:458
void operator()(cpu::Queue< T_Device > &queue, onAcc::internal::GlobalDeviceMemoryWrapper< T_Storage, T > dest, auto &&source) const
Definition Queue.hpp:437