thread_group_tensor_slice_transfer_v4r1_gather.hpp Source File#
thread_group_tensor_slice_transfer_v4r1_gather.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__host__ __device__ constexpr auto make_zero_multi_index()
Definition array_multi_index.hpp:21
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
__device__ void Run(const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id)
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:143
__device__ void RunWrite(const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:131
static constexpr auto thread_slice_lengths
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:51
static constexpr index_t gather_num
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:52
static constexpr index_t nDim
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:50
__device__ void MoveDstSliceWindow(const DstDesc &dst_desc, const Index &step)
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:162
__device__ void SetSrcSliceOrigin(const SrcDesc &src_desc, const Index &src_block_slice_origin)
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:98
__device__ void RunRead(const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:119
MultiIndex< nDim > Index
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:53
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &step)
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:153
static constexpr auto I0
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:49
__device__ constexpr ThreadGroupTensorSliceTransfer_v4r1_gather(const SrcDesc &src_desc, const Index &src_block_slice_origin, const SrcElementwiseOperation &src_element_op, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const DstElementwiseOperation &dst_element_op, const StaticallyIndexedArray< IndexType, gather_num > &gather_offsets)
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:55
__device__ constexpr auto GetSrcThreadScratchIdx()
Definition thread_group_tensor_slice_transfer_v4r1_gather.hpp:113
__device__ void RunWrite(const DstDesc &dst_desc, DstBuffer &dst_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition threadwise_tensor_slice_transfer_v3r1_gather.hpp:486
__device__ void RunRead(const SrcDesc &src_desc, const SrcBuffer &src_buf, Number< ThreadScratchId > thread_scratch_id=Number< ThreadScratchId >{})
Definition threadwise_tensor_slice_transfer_v3r1_gather.hpp:132
Definition type.hpp:177