BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL > Struct Template Reference

BlockwiseGemmXdlops_pipeline_hotloop_inst&lt; BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL > Struct Template Reference
ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL > Struct Template Reference

#include <blockwise_gemm_pipeline_xdlops.hpp>

Static Public Member Functions

static constexpr auto Print ()
static constexpr auto Print ()

Static Public Attributes

static constexpr index_t WaveNumM = MPerBlock / (MRepeat * MPerXDL)
static constexpr index_t WaveNumN = NPerBlock / (NRepeat * NPerXDL)
static constexpr index_t WaveSize = BlockSize / (WaveNumM * WaveNumN)
static constexpr index_t A_Buffer_Load_Inst_Num
static constexpr index_t B_Buffer_Load_Inst_Num
static constexpr index_t A_LDS_Write_Inst_Num
static constexpr index_t B_LDS_Write_Inst_Num
static constexpr index_t A_LDS_Read_Inst_Num
static constexpr index_t B_LDS_Read_Inst_Num
static constexpr index_t C_MFMA_Inst_Num
static constexpr index_t A_LDS_Read_Width = ALDSReadWidth
static constexpr index_t B_LDS_Read_Width = BLDSReadWidth
static constexpr index_t C_MFMA_SpeedUp = IsF4F6 ? 2 : 1
static constexpr index_t C_MFMA_Inst_Cycle

Member Function Documentation

◆ Print() [1/2]

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr auto ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::Print ( )
inlinestaticconstexpr

◆ Print() [2/2]

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr auto ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::Print ( )
inlinestaticconstexpr

Member Data Documentation

◆ A_Buffer_Load_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::A_Buffer_Load_Inst_Num
staticconstexpr
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ABufferLoadWidth)

◆ A_LDS_Read_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::A_LDS_Read_Inst_Num
staticconstexpr
Initial value:
=
WaveNumN * MPerBlock * KPerBlock / (BlockSize * ALDSReadWidth)
static constexpr index_t WaveNumN
Definition blockwise_gemm_pipeline_wmmaops.hpp:29

◆ A_LDS_Read_Width

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::A_LDS_Read_Width = ALDSReadWidth
staticconstexpr

◆ A_LDS_Write_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::A_LDS_Write_Inst_Num
staticconstexpr
Initial value:
=
MPerBlock * KPerBlock / (BlockSize * ALDSWriteWidth)

◆ B_Buffer_Load_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::B_Buffer_Load_Inst_Num
staticconstexpr
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BBufferLoadWidth)

◆ B_LDS_Read_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::B_LDS_Read_Inst_Num
staticconstexpr
Initial value:
=
WaveNumM * MPerBlock * KPerBlock / (BlockSize * BLDSReadWidth)
static constexpr index_t WaveNumM
Definition blockwise_gemm_pipeline_wmmaops.hpp:28

◆ B_LDS_Read_Width

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::B_LDS_Read_Width = BLDSReadWidth
staticconstexpr

◆ B_LDS_Write_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::B_LDS_Write_Inst_Num
staticconstexpr
Initial value:
=
NPerBlock * KPerBlock / (BlockSize * BLDSWriteWidth)

◆ C_MFMA_Inst_Cycle

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::C_MFMA_Inst_Cycle
staticconstexpr
Initial value:
= []() {
if constexpr(NPerXDL == 16)
{
return KPerXDL == 128 ? 32 / C_MFMA_SpeedUp : 16 / C_MFMA_SpeedUp;
}
else if constexpr(NPerXDL == 32)
{
return KPerXDL == 64 ? 64 / C_MFMA_SpeedUp : 32 / C_MFMA_SpeedUp;
}
}()
static constexpr index_t C_MFMA_SpeedUp
Definition blkgemmpipe_scheduler.hpp:103

◆ C_MFMA_Inst_Num

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::C_MFMA_Inst_Num
staticconstexpr
Initial value:
=
MPerBlock * NPerBlock * KPerBlock / (BlockSize / WaveSize) / (MPerXDL * NPerXDL * KPerXDL)
static constexpr index_t WaveSize
Definition blockwise_gemm_pipeline_xdlops.hpp:37

◆ C_MFMA_SpeedUp

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::C_MFMA_SpeedUp = IsF4F6 ? 2 : 1
staticconstexpr

◆ WaveNumM

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::WaveNumM = MPerBlock / (MRepeat * MPerXDL)
staticconstexpr

◆ WaveNumN

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::WaveNumN = NPerBlock / (NRepeat * NPerXDL)
staticconstexpr

◆ WaveSize

template<index_t BlockSize, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t ABufferLoadWidth, index_t BBufferLoadWidth, index_t ALDSWriteWidth, index_t BLDSWriteWidth, index_t ALDSReadWidth, index_t BLDSReadWidth, index_t MRepeat, index_t NRepeat, index_t MPerXDL, index_t NPerXDL, index_t KPerXDL>
constexpr index_t ck::BlockwiseGemmXdlops_pipeline_hotloop_inst< BlockSize, MPerBlock, NPerBlock, KPerBlock, ABufferLoadWidth, BBufferLoadWidth, ALDSWriteWidth, BLDSWriteWidth, ALDSReadWidth, BLDSReadWidth, MRepeat, NRepeat, MPerXDL, NPerXDL, KPerXDL >::WaveSize = BlockSize / (WaveNumM * WaveNumN)
staticconstexpr

The documentation for this struct was generated from the following files: