GemmPipelineAgBgCrCompV3< Problem, Policy > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy > Struct Template Reference
#include <gemm_pipeline_ag_bg_cr_comp_v3.hpp>
Inheritance diagram for ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >:
Classes | |
| struct | PipelineImpl |
| struct | PipelineImpl< GemmPipelineScheduler::Intrawave > |
Public Types | |
| using | Base = BaseGemmPipelineAgBgCrCompV3<Problem> |
| using | PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy> |
| using | AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple> |
| using | BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple> |
| using | CDataType = remove_cvref_t<typename Problem::CDataType> |
| using | AElementWise = remove_cvref_t<typename Problem::AElementWise> |
| using | BElementWise = remove_cvref_t<typename Problem::BElementWise> |
| using | BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
| using | AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple> |
| using | BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple> |
| using | CLayout = remove_cvref_t<typename Problem::CLayout> |
| using | ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>> |
| using | BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>> |
| using | ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>> |
| using | BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>> |
| using | BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())> |
| using | I0 = number<0> |
| using | I1 = number<1> |
| using | I2 = number<2> |
Public Member Functions | |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const |
| This function runs the pipeline by wrapping it with the tail handler. | |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const |
| This function runs the pipeline using compile-time known hot loop and tail number. | |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t<!is_detected< is_tuple, AsDramBlockWindowTmp >::value &&!is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const |
| template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const |
| Quant operator(), single input: This function runs the pipeline by wrapping it with the tail handler. | |
| template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const |
| Quant operator(), single input: This function runs the pipeline using compile-time known hot loop and tail number. | |
Static Public Member Functions | |
| template<bool IsWave32Host = false> | |
| static constexpr index_t | GetVectorSizeA () |
| template<bool IsWave32Host = false> | |
| static constexpr index_t | GetVectorSizeB () |
| static constexpr index_t | GetVectorSizeC () |
| static constexpr index_t | GetSmemPackA () |
| static constexpr index_t | GetSmemPackB () |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
| static CK_TILE_HOST std::string | Print () |
| Static Public Member Functions inherited from ck_tile::BaseGemmPipelineAgBgCrCompV3< Problem > | |
| static CK_TILE_HOST_DEVICE constexpr bool | BlockHasHotloop (index_t num_loop) |
| static CK_TILE_HOST_DEVICE constexpr TailNumber | GetBlockLoopTailNum (index_t num_loop) |
| template<typename RunFunction> | |
| static CK_TILE_HOST_DEVICE auto | TailHandler (const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number) |
Static Public Attributes | |
| static constexpr index_t | BlockSize = Problem::kBlockSize |
| static constexpr index_t | MPerBlock = BlockGemmShape::kM |
| static constexpr index_t | NPerBlock = BlockGemmShape::kN |
| static constexpr index_t | KPerBlock = BlockGemmShape::kK |
| static constexpr index_t | APackedSize |
| static constexpr index_t | BPackedSize |
| static constexpr bool | kPadM = Problem::kPadM |
| static constexpr bool | kPadN = Problem::kPadN |
| static constexpr bool | kPadK = Problem::kPadK |
| static constexpr bool | DoubleSmemBuffer = Problem::DoubleSmemBuffer |
| static constexpr index_t | NumWaveGroups = Problem::NumWaveGroups |
| static constexpr index_t | Preshuffle = Problem::Preshuffle |
| static constexpr bool | HasHotLoop |
| static constexpr auto | TailNum |
| static constexpr auto | Scheduler = Problem::Scheduler |
| static constexpr auto | is_a_load_tr_v = bool_constant<PipelineImplBase::is_a_load_tr>{} |
| static constexpr auto | is_b_load_tr_v = bool_constant<PipelineImplBase::is_b_load_tr>{} |
| static constexpr index_t | PrefetchStages |
| static constexpr bool | UsePersistentKernel |
| Static Public Attributes inherited from ck_tile::BaseGemmPipelineAgBgCrCompV3< Problem > | |
| static constexpr index_t | PrefetchStages = 2 |
| static constexpr index_t | PrefillStages = 1 |
| static constexpr index_t | GlobalBufferNum = 1 |
| static constexpr bool | UsePersistentKernel = Problem::Traits::UsePersistentKernel |
Member Typedef Documentation
◆ ADataType
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>> |
◆ AElementWise
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::AElementWise = remove_cvref_t<typename Problem::AElementWise> |
◆ ALayout
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>> |
◆ AsDataType
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple> |
◆ AsLayout
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple> |
◆ Base
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::Base = BaseGemmPipelineAgBgCrCompV3<Problem> |
◆ BDataType
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>> |
◆ BElementWise
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BElementWise = remove_cvref_t<typename Problem::BElementWise> |
◆ BLayout
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>> |
◆ BlockGemm
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())> |
◆ BlockGemmShape
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
◆ BsDataType
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple> |
◆ BsLayout
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple> |
◆ CDataType
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::CDataType = remove_cvref_t<typename Problem::CDataType> |
◆ CLayout
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::CLayout = remove_cvref_t<typename Problem::CLayout> |
◆ I0
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::I0 = number<0> |
◆ I1
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::I1 = number<1> |
◆ I2
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::I2 = number<2> |
◆ PipelineImplBase
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV3< Problem, Policy >::PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy> |
Member Function Documentation
◆ GetName()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestaticnodiscard |
◆ GetSmemPackA()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetSmemPackB()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetSmemSize()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ GetVectorSizeA()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeB()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeC()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestaticconstexpr |
◆ operator()() [1/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
Quant operator(), single input: This function runs the pipeline by wrapping it with the tail handler.
- Note
- This is used by the persistent gemm kernel variants that don't determine hot loop and tail number on the host side, e.g. grouped gemm kernel.
◆ operator()() [2/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
Quant operator(), single input: This function runs the pipeline using compile-time known hot loop and tail number.
- Parameters
-
num_loop The number of loop iterations. This is determined at runtime due to e.g. SplitK.
- Note
- This is used by the kernel variants that are able to determine hot loop and tail number on the host side, e.g. non-persistent gemm kernel.
◆ operator()() [3/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t<!is_detected< is_tuple, AsDramBlockWindowTmp >::value &&!is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [4/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [5/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
This function runs the pipeline by wrapping it with the tail handler.
- Note
- This is used by the persistent gemm kernel variants that don't determine hot loop and tail number on the host side, e.g. grouped gemm kernel.
◆ operator()() [6/6]
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
This function runs the pipeline using compile-time known hot loop and tail number.
- Parameters
-
num_loop The number of loop iterations. This is determined at runtime due to e.g. SplitK.
- Note
- This is used by the kernel variants that are able to determine hot loop and tail number on the host side, e.g. non-persistent gemm kernel.
◆ Print()
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
inlinestatic |
Member Data Documentation
◆ APackedSize
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
ck_tile::numeric_traits<remove_cvref_t<ADataType>>::PackedSize
Definition tile/core/numeric/numeric.hpp:81
◆ BlockSize
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ BPackedSize
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
ck_tile::numeric_traits<remove_cvref_t<BDataType>>::PackedSize
◆ DoubleSmemBuffer
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ HasHotLoop
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
Problem::HasHotLoop
◆ is_a_load_tr_v
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ is_b_load_tr_v
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadK
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadM
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ kPadN
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ KPerBlock
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ MPerBlock
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NPerBlock
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ NumWaveGroups
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ PrefetchStages
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Preshuffle
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ Scheduler
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
◆ TailNum
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
Initial value:
=
Problem::TailNum
◆ UsePersistentKernel
template<typename Problem, typename Policy = UniversalGemmPipelineAgBgCrPolicy>
|
staticconstexpr |
The documentation for this struct was generated from the following file: