QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ > Struct Template Reference
#include <gemm_quant_kernel.hpp>
Classes | |
| struct | SplitKBatchOffset |
Public Types | |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | GemmPipeline = remove_cvref_t<GemmPipeline_> |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | ALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
| using | BLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
| using | CLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
| using | AQLayout |
| using | BQLayout |
| using | ADataType = remove_cvref_t<typename GemmPipeline::ADataType> |
| using | BDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
| using | CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
| using | AccDataType = remove_cvref_t<typename EpiloguePipeline::AccDataType> |
| using | AQDataType |
| using | BQDataType |
Public Member Functions | |
| CK_TILE_DEVICE void | operator() (QuantGemmKernelArgs kargs) const |
Static Public Member Functions | |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST constexpr auto | GridSize (index_t M, index_t N, index_t KBatch) |
| static CK_TILE_HOST constexpr auto | BlockSize () |
| static CK_TILE_HOST constexpr QuantGemmKernelArgs | MakeKernelArgs (const QuantGemmHostArgs &hostArgs) |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
| static CK_TILE_HOST bool | IsSupportedArgument (const QuantGemmKernelArgs &kargs) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set> | |
| static CK_TILE_DEVICE auto | MakeGemmTensorViews (const ADataType *a_ptr, const BDataType *b_ptr, const AQDataType *aq_ptr, const BQDataType *bq_ptr, CDataType *c_ptr, const QuantGemmKernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset) |
| template<typename TensorView> | |
| static CK_TILE_DEVICE auto | MakeGemmPadViews (const TensorView &views) |
| template<typename PadView> | |
| static CK_TILE_DEVICE auto | MakeGemmTileWindows (const PadView &views, const index_t i_m, const index_t i_n) |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set> | |
| static CK_TILE_DEVICE void | RunGemm (const ADataType *a_ptr, const BDataType *b_ptr, const AQDataType *aq_ptr, const BQDataType *bq_ptr, CDataType *c_ptr, void *smem_ptr_0, const QuantGemmKernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
| template<memory_operation_enum DstInMemOp = memory_operation_enum::set> | |
| static CK_TILE_DEVICE void | RunGemm2LDS (const ADataType *a_ptr, const BDataType *b_ptr, const AQDataType *aq_ptr, const BQDataType *bq_ptr, CDataType *c_ptr, void *smem_ptr_0, void *smem_ptr_1, const QuantGemmKernelArgs &kargs, const SplitKBatchOffset &splitk_batch_offset, const index_t block_idx_m, const index_t block_idx_n) |
| Runs single GEMM problem cooperatively by whole workgroup. | |
Static Public Attributes | |
| static constexpr index_t | kBlockSize = GemmPipeline::BlockSize |
| static constexpr bool | PreshuffleQuant |
| static constexpr bool | PreshuffleB = detail::is_preshuffleB_enabled<GemmPipeline_>::value |
| static constexpr auto | I0 = number<0>() |
| static constexpr auto | I1 = number<1>() |
| static constexpr auto | I2 = number<2>() |
| static constexpr auto | I3 = number<3>() |
| static constexpr auto | I4 = number<4>() |
| static constexpr auto | kQuantType = QuantType_ |
Member Typedef Documentation
◆ AccDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::AccDataType = remove_cvref_t<typename EpiloguePipeline::AccDataType> |
◆ ADataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::ADataType = remove_cvref_t<typename GemmPipeline::ADataType> |
◆ ALayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::ALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
◆ AQDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::AQDataType |
Initial value:
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
◆ AQLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::AQLayout |
Initial value:
◆ BDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::BDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
◆ BLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::BLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
◆ BQDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::BQDataType |
◆ BQLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::BQLayout |
Initial value:
◆ CDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ CLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::CLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
◆ EpiloguePipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ GemmPipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::GemmPipeline = remove_cvref_t<GemmPipeline_> |
◆ TilePartitioner
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
| using ck_tile::QuantGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_, QuantType_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
Member Function Documentation
◆ BlockSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestaticconstexpr |
◆ GetName()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestaticnodiscard |
◆ GetSmemSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestaticconstexpr |
◆ GridSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestaticconstexpr |
◆ IsSupportedArgument()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestatic |
◆ MakeGemmPadViews()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
template<typename TensorView>
|
inlinestatic |
◆ MakeGemmTensorViews()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
|
inlinestatic |
◆ MakeGemmTileWindows()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
template<typename PadView>
|
inlinestatic |
◆ MakeKernelArgs()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inlinestaticconstexpr |
◆ operator()()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
inline |
◆ RunGemm()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer aq_ptr input AQ pointer bq_ptr input BQ pointer c_ptr output C pointer smem_ptr_0 The start memory pointer of the shared memory block. kargs GEMM kernel arguments splitk_batch_offset splitk_batch_offset Utility structure used to calculate k batch. block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
- Template Parameters
-
DstInMemOp Destination memory operation (default: set).
◆ RunGemm2LDS()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
template<memory_operation_enum DstInMemOp = memory_operation_enum::set>
|
inlinestatic |
Runs single GEMM problem cooperatively by whole workgroup.
- Parameters
-
a_ptr input A pointer b_ptr input B pointer aq_ptr input AQ pointer c_ptr output C pointer smem_ptr_0 The start memory pointer of the shared memory block. kargs GEMM kernel arguments splitk_batch_offset splitk_batch_offset Utility structure used to calculate k batch. block_idx_m The GEMM's output M dimension tile index processed by this workgroup. block_idx_n The GEMM's output N dimension tile index processed by this workgroup.
- Template Parameters
-
DstInMemOp Destination memory operation (default: set).
Member Data Documentation
◆ I0
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ I1
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ I2
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ I3
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ I4
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ kBlockSize
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ kQuantType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ PreshuffleB
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
◆ PreshuffleQuant
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_, QuantType QuantType_>
|
staticconstexpr |
Initial value:
=
static constexpr bool value
Definition gemm_quant_kernel.hpp:72
The documentation for this struct was generated from the following file: