latest/doxygen_dev/hip_2IdxLayer_8hpp_source.html

/* Copyright 2024 René Widera

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "alpaka/Vec.hpp"

#include "alpaka/core/config.hpp"


#if ALPAKA_LANG_HIP


namespace alpaka::onAcc

{

    namespace unifiedCudaHip

    {

        template<typename T_OptimizedThreadSpec>

        struct BlockLayer

        {

            T_OptimizedThreadSpec const& m_optimizedThreadSpec;

            static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();

            using IdxType = typename T_OptimizedThreadSpec::NumBlocksVecType::type;


            constexpr BlockLayer(T_OptimizedThreadSpec const& optimizedThreadSpec)

                : m_optimizedThreadSpec(optimizedThreadSpec)

            {

            }


            constexpr auto idx() const

            {

                if constexpr(dim <= 3u)

                {

                    return Vec<IdxType, 3u>{hipBlockIdx_z, hipBlockIdx_y, hipBlockIdx_x}.template rshrink<dim>();

                }

                else

                {

                    return mapToND(m_optimizedThreadSpec.getNumBlocks(), static_cast<IdxType>(hipBlockIdx_x));

                }

            }


            constexpr auto count() const

            {

                if constexpr(dim <= 3u)

                {

                    return Vec<IdxType, 3u>{hipGridDim_z, hipGridDim_y, hipGridDim_x}.template rshrink<dim>();

                }

                else

                {

                    return m_optimizedThreadSpec.getNumBlocks();

                }

            }

        };


        template<typename T_OptimizedThreadSpec>

        struct ThreadLayer

        {

            T_OptimizedThreadSpec const& m_optimizedThreadSpec;

            static constexpr uint32_t dim = T_OptimizedThreadSpec::dim();

            using IdxType = typename T_OptimizedThreadSpec::NumThreadsVecType::type;


            constexpr ThreadLayer(T_OptimizedThreadSpec const& optimizedThreadSpec)

                : m_optimizedThreadSpec(optimizedThreadSpec)

            {

            }


            constexpr auto idx() const

            {

                if constexpr(dim <= 3u)

                {

                    return Vec<IdxType, 3u>{hipThreadIdx_z, hipThreadIdx_y, hipThreadIdx_x}.template rshrink<dim>();

                }

                else

                {

                    return mapToND(m_optimizedThreadSpec.getNumThreads(), static_cast<IdxType>(hipThreadIdx_x));

                }

            }


            constexpr auto count() const

            {

                if constexpr(dim <= 3u)

                {

                    return Vec<IdxType, 3u>{hipBlockDim_z, hipBlockDim_y, hipBlockDim_x}.template rshrink<dim>();

                }

                else

                {

                    return m_optimizedThreadSpec.getNumThreads();

                }

            }


            constexpr auto count() const

                requires alpaka::concepts::CVector<typename T_OptimizedThreadSpec::NumThreadsVecType>

            {

                return typename T_OptimizedThreadSpec::NumThreadsVecType{};

            }

        };

    } // namespace unifiedCudaHip

} // namespace alpaka::onAcc


#endif

Vec.hpp

config.hpp

alpaka::onAcc
functionality which is usable on the accelerator compute device from within a kernel.
Definition executor.hpp:38

alpaka::mapToND
constexpr Vec< T_IntegralType, T_dim > mapToND(Vec< T_IntegralType, T_dim, T_Storage > const &extents, T_IntegralType linearIdx)
Maps a linear index to an N-dimensional index.
Definition Vec.hpp:880