device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp Source File#
device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp
Go to the documentation of this file.
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:91
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Definition utility/math.hpp:72
Definition convolution_backward_data_specialization.hpp:8
__global__ void kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3_2lds(typename GridwiseGemm::Argument karg, const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1, const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const index_t num_k_per_block)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:109
__global__ void kernel_grouped_conv_bwd_weight_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg, const AGridDesc_AK0_M_K1 a_grid_desc_ak0_m_ak1, const BGridDesc_BK0_N_K1 b_grid_desc_bk0_n_bk1, const CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const index_t num_k_per_block)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:51
auto get_bwd_weight_gemm_sizes(const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths)
Definition split_k_utils.hpp:55
ConvolutionBackwardWeightSpecialization
Definition convolution_backward_weight_specialization.hpp:13
@ Filter1x1Stride1Pad0
Definition convolution_backward_weight_specialization.hpp:15
constexpr bool is_NHWGC_GKYXC_NHWGK()
Definition device_grouped_conv_utils.hpp:40
ck::index_t get_best_occupancy_k_batch_value(int max_occupancy, ck::index_t grid_size)
Definition split_k_utils.hpp:30
GemmSpecialization
Definition gemm_specialization.hpp:11
@ Default
Definition gemm_specialization.hpp:13
constexpr bool is_NDHWGC_GKZYXC_NDHWGK()
Definition device_grouped_conv_utils.hpp:80
constexpr bool is_NGCDHW_NGKDHW()
Definition device_grouped_conv_utils.hpp:112
constexpr bool is_NGCHW_GKCYX_NGKHW()
Definition device_grouped_conv_utils.hpp:64
std::string getConvBackwardWeightSpecializationString(const ConvolutionBackwardWeightSpecialization &s)
Definition convolution_backward_weight_specialization.hpp:21
ck::index_t calculate_mn_grid_size(ck::index_t gemmM, ck::index_t gemmN)
Definition split_k_utils.hpp:84
constexpr bool is_NGCDHW_GKCZYX_NGKDHW()
Definition device_grouped_conv_utils.hpp:104
constexpr bool is_NGCHW_NGKHW()
Definition device_grouped_conv_utils.hpp:72
Definition convolution_backward_data_specialization.hpp:7
float launch_and_time_kernel_with_preprocess(const StreamConfig &stream_config, PreProcessFunc preprocess, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, GemmArgs &gemm_args, Args... args)
Definition flush_cache.hpp:398
Definition ck.hpp:268
__global__ void kernel_batched_elementwise(const InGridDescTuple in_grid_desc_tuple, const OutGridDescTuple out_grid_desc_tuple, const InDataTypePointerTuple p_in_global_tuple, const OutDataTypePointerTuple p_out_global_tuple, const Block2TileMap block_2_tile_map, const ElementwiseOperation elementwise_op, const index_t batch_count, const std::array< index_t, NumInputs > input_batch_strides, const std::array< index_t, NumOutputs > output_batch_strides)
Definition gridwise_elementwise_2d.hpp:221
__device__ uint32_t amd_wave_read_first_lane(uint32_t value)
Definition amd_wave_read_first_lane.hpp:100
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
auto accumulate_n(ForwardIterator first, Size count, T init, BinaryOperation op) -> decltype(std::accumulate(first, std::next(first, count), init, op))
Definition library/utility/numeric.hpp:11
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__global__ void kernel_elementwise_dual(const InAGridDescTuple in_grid_desc_tuple_a, const InBGridDescTuple in_grid_desc_tuple_b, const OutAGridDescTuple out_grid_desc_tuple_a, const OutBGridDescTuple out_grid_desc_tuple_b, const InADataTypePointerTuple p_in_global_tuple_a, const InBDataTypePointerTuple p_in_global_tuple_b, const OutADataTypePointerTuple p_out_global_tuple_a, const OutBDataTypePointerTuple p_out_global_tuple_b, const Block2TileMapA block_2_tile_map_a, const Block2TileMapB block_2_tile_map_b, const ElementwiseOperation elementwise_op, const index_t a_grid_size)
Definition gridwise_elementwise_2d.hpp:61
__global__ void kernel_elementwise(const InGridDescTuple in_grid_desc_tuple, const OutGridDescTuple out_grid_desc_tuple, const InDataTypePointerTuple p_in_global_tuple, const OutDataTypePointerTuple p_out_global_tuple, const Block2TileMap block_2_tile_map, const ElementwiseOperation elementwise_op)
Definition gridwise_elementwise_2d.hpp:29
Definition ck/stream_config.hpp:10
Definition block_to_ctile_map.hpp:261
Definition gridwise_elementwise_2d.hpp:278
Definition gridwise_gemm_xdl_cshuffle_conv_v3.hpp:66
__host__ static __device__ constexpr auto MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(const CGridDesc &c_grid_desc_m_n, index_t MBlock, index_t NBlock)
Definition gridwise_gemm_xdl_cshuffle_conv_v3.hpp:644
Definition utility/sequence.hpp:43
Definition utility/tuple.hpp:117
Definition tensor_operation/gpu/device/tensor_layout.hpp:31
Definition tensor_operation/gpu/device/tensor_layout.hpp:26
Definition tensor_operation/operator_transform/transform_conv_bwd_weight_to_gemm.hpp:24
Transform conv bwd weight to gemm v2.
Definition transform_conv_bwd_weight_to_gemm_v2.hpp:33
Definition transform_conv_ngchw_to_nhwgc.hpp:31
Definition split_k_arg.hpp:11
Definition device_base.hpp:197
void * p_workspace_
Definition device_base.hpp:204
BaseArgument()=default
BaseInvoker()=default
Definition device_grouped_conv_bwd_weight.hpp:29
ck::tensor_operation::device::DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle::ActiveWorkgroupsPerCU
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:522
int GetMaxOccupancy()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:524
int max_occupancy_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:586
ActiveWorkgroupsPerCU()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:568
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:590
long_index_t c_space_size_bytes
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:866
NGCHWTransposeDescType b_in_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:840
std::array< ck::index_t, NDimSpatial > input_spatial_lengths_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:860
const index_t Conv_C_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:859
InElementwiseOperation b_element_op_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:852
OutElementwiseOperation a_element_op_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:851
WeiElementwiseOperation cde_element_op_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:853
const std::array< ck::index_t, NDimSpatial > & input_left_pads_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:864
Block2TileMapElementwise elementwise_block_2_ctile_map_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:836
ComputePtrOffsetOfStridedBatch< I1, I1, I0 > compute_ptr_offset_of_batch_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:846
NHWGCTransposeDescType b_out_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:841
std::size_t GetWorkspaceETensorSizeBytes() const
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:797
CElementwiseGridDesc_M_N ce_elementwise_grid_desc_m_n_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:833
const BDataType * p_b_grid_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:827
Block2TileMapElementwise elementwise_block_2_ctile_map_transpose_a_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:837
const index_t Conv_N_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:857
std::array< ck::index_t, NDimSpatial > filter_spatial_lengths_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:861
Argument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, const ck::index_t M01, const ck::index_t N01, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:591
std::size_t GetWorkspaceSizeBytes() const
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:806
std::size_t GetWorkspaceATensorSizeBytes() const
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:784
const index_t Conv_K_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:858
const ADataType * p_a_grid_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:826
const std::array< ck::index_t, NDimSpatial > & conv_filter_strides_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:863
CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock c_grid_desc_mblock_mperblock_nblock_nperblock_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:834
GKYXCTransposeDescType e_in_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:842
std::array< ck::index_t, NDimSpatial > output_spatial_lengths_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:862
BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:831
const std::array< ck::index_t, NDimSpatial > & input_right_pads_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:865
NHWGCTransposeDescType a_out_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:841
EDataType * p_e_grid_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:828
index_t M01_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:848
std::size_t GetWorkspaceBTensorSizeBytes() const
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:792
Block2TileMapElementwise elementwise_block_2_ctile_map_transpose_b_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:838
AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:830
GKCYXTransposeDescType e_out_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:843
NGCHWTransposeDescType a_in_transpose_desc_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:840
const index_t Conv_G_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:856
CGridDesc_M_N ce_grid_desc_m_n_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:832
index_t N01_
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:849
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:871
INVOKER_RUN_IMPL float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1670
void ShowInfo(const Argument &arg)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:874
float RunGemmV3(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:889
float RunImp(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1543
DeviceOp::Argument Argument
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:872
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:217
ck::tensor_operation::device::DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle::BElementwiseOperation
InElementwiseOperation BElementwiseOperation
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:237
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1900
static constexpr auto I4
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:247
void SetWorkSpacePointer(BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:2058
InDataType ABDataType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:241
static constexpr auto I5
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:248
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeGKCYXTransposeDesc< NDimSpatial >({}, {}))> GKCYXTransposeDescType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:390
static constexpr auto I1
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:244
static GET_NXDL_PER_WAVE_IMPL constexpr auto NXdlPerWave64
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:224
std::string GetTypeString() const override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1992
GridwiseGemmBase< NXdlPerWave32 > GridwiseGemm32
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:454
OutDataType ADataType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:227
static constexpr GemmSpecialization GemmSpec
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:282
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeGKYXCTransposeDesc< NDimSpatial >({}, {}))> GKYXCTransposeDescType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:393
static bool IsSupportedArgument(const Argument &arg)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1683
static auto MakeInvoker()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1945
size_t GetWorkSpaceSize(const BaseArgument *p_arg) const override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:2045
static constexpr bool IsValidCompilationParameter()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1677
WeiDataType EDataType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:229
decltype(GetABCGridDesc< NDimSpatial >()) ABCGridDescs
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:397
static constexpr auto I3
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:246
static auto MakeArgument(const InDataType *p_in_grid, WeiDataType *p_wei_grid, const OutDataType *p_out_grid, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k)
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1906
GridwiseElementwise< Tuple< CElementwiseGridDesc_M_N >, Tuple< CElementwiseGridDesc_M_N >, Tuple< const AccDataType * >, Tuple< EDataType * >, Block2TileMapElementwise, CDEElementwiseOperation, BlockSize, MPerBlock, NPerBlock, MPerBlock/ClusterLengthMPerBlock, NPerBlock/ClusterLengthNPerBlock, Sequence< 0, 1 >, Sequence< CBlockTransferScalarPerVector_NWaveNPerXdl >, Sequence< CBlockTransferScalarPerVector_NWaveNPerXdl >, I1, I1 > GridwiseElementwiseCast
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:458
WeiElementwiseOperation CDEElementwiseOperation
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:238
ck::tensor_operation::device::DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle::AElementwiseOperation
OutElementwiseOperation AElementwiseOperation
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:236
InDataType BDataType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:228
BlockToCTileMap_M00_N0_M01Adapt< MPerBlock, NPerBlock > Block2TileMapElementwise
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:456
remove_cvref_t< decltype(ABCGridDescs{}[I2])> CGridDesc_M_N
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:401
static constexpr auto I0
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:243
static constexpr auto K1Number
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:250
GridwiseElementwise< Tuple< GKYXCTransposeDescType >, Tuple< GKCYXTransposeDescType >, Tuple< const AccDataType * >, Tuple< EDataType * >, Block2TileMapElementwise, CDEElementwiseOperation, BlockSize, MPerBlock, NPerBlock, MPerBlock/ClusterLengthMPerBlock, NPerBlock/ClusterLengthNPerBlock, Sequence< 0, 1 >, Sequence< CBlockTransferScalarPerVector_NWaveNPerXdl >, Sequence< 1 >, I1, I0 > GridwiseElementwiseWeightTransposeCast
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:480
GridwiseGemmBase< math::max(NXdlPerWave64, 1)> GridwiseGemm64
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:453
GridwiseGemm_xdl_cshuffle_conv_v3< tensor_layout::gemm::RowMajor, tensor_layout::gemm::ColumnMajor, tensor_layout::gemm::RowMajor, ADataType, BDataType, AccDataType, AccDataType, AccDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, MPerBlock, NPerBlock, KPerBlock, K1, K1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave_, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, false, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, false, BBlockLdsAddExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CBlockTransferScalarPerVector_NWaveNPerXdl, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB > GridwiseGemmBase
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:406
DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle DeviceOp
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:222
static constexpr auto I2
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:245
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeNHWGCTransposeDesc< NDimSpatial >({}, {}))> NHWGCTransposeDescType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:387
remove_cvref_t< decltype(conv_ngchw_to_nhwgc_transformer .template MakeNGCHWTransposeDesc< NDimSpatial >({}, {}))> NGCHWTransposeDescType
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:384
static constexpr index_t ClusterLengthMPerBlock
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:269
static constexpr auto NXdlPerWave32
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:225
remove_cvref_t< decltype(ABCGridDescs{}[I1])> BGridDesc_K0_N_K1
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:400
static constexpr index_t ClusterLengthNPerBlock
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:271
static auto GetElementwiseCGridDesc()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:335
static constexpr auto conv_to_gemm_transformer_v1
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:261
decltype(GridwiseGemm64::MakeCGridDescriptor_MBlock_MPerBlock_NBlock_NPerBlock( CGridDesc_M_N{}, 1, 1)) CGridDesc_MBlock_MPerBlock_NBlock_NPerBlock
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:517
static auto GetABCGridDesc()
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:285
static constexpr auto conv_ngchw_to_nhwgc_transformer
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:274
remove_cvref_t< decltype(ABCGridDescs{}[I0])> AGridDesc_K0_M_K1
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:399
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_in_grid, void *p_wei_grid, const void *p_out_grid, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_lengths, const std::array< index_t, NDimSpatial+3 > &b_g_n_c_wis_strides, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_lengths, const std::array< index_t, NDimSpatial+3 > &e_g_k_c_xs_strides, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_lengths, const std::array< index_t, NDimSpatial+3 > &a_g_n_k_wos_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_strides, const std::array< ck::index_t, NDimSpatial > &conv_filter_dilations, const std::array< ck::index_t, NDimSpatial > &input_left_pads, const std::array< ck::index_t, NDimSpatial > &input_right_pads, InElementwiseOperation in_element_op, WeiElementwiseOperation wei_element_op, OutElementwiseOperation out_element_op, const ck::index_t split_k) override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1948
GridwiseElementwise< Tuple< NGCHWTransposeDescType >, Tuple< NHWGCTransposeDescType >, Tuple< const ADataType * >, Tuple< ADataType * >, Block2TileMapElementwise, element_wise::PassThrough, BlockSize, MPerBlock, NPerBlock, MPerBlock/ClusterLengthMPerBlock, NPerBlock/ClusterLengthNPerBlock, Sequence< 1, 0 >, Sequence< TransposeTransferSrcScalarPerVector >, Sequence< TransposeTransferDstScalarPerVector >, I1, I0 > GridwiseElementwiseTranspose
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:498
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:1987
static constexpr auto conv_to_gemm_transformer_v2
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:252
remove_cvref_t< decltype(GetElementwiseCGridDesc< NDimSpatial >())> CElementwiseGridDesc_M_N
Definition device_grouped_conv_bwd_weight_two_stage_xdl_cshuffle.hpp:402
Definition tensor_operation/gpu/element/unary_element_wise_operation.hpp:340