unary_element_wise_operation.hpp File Reference

unary_element_wise_operation.hpp File Reference#

Composable Kernel: unary_element_wise_operation.hpp File Reference
unary_element_wise_operation.hpp File Reference
#include "ck_tile/core.hpp"
#include <cstdint>
#include <type_traits>

Go to the source code of this file.

Classes

struct  ck_tile::element_wise::PassThroughPack8
struct  ck_tile::element_wise::DequantPack8
struct  ck_tile::element_wise::PassThroughPack2
struct  ck_tile::element_wise::PassThrough
struct  ck_tile::element_wise::AddScale
struct  ck_tile::element_wise::MultiDMultiply
struct  ck_tile::element_wise::MultiDAdd
struct  ck_tile::element_wise::UnaryConvert
struct  ck_tile::element_wise::Scale
struct  ck_tile::element_wise::ScaleAndResetNaNToMinusInfinity
struct  ck_tile::element_wise::UnaryDivide
struct  ck_tile::element_wise::UnarySquare
struct  ck_tile::element_wise::UnaryAbs
struct  ck_tile::element_wise::UnarySqrt
struct  ck_tile::element_wise::Relu
struct  ck_tile::element_wise::FastGelu
struct  ck_tile::element_wise::FastGeluAsm
struct  ck_tile::element_wise::Gelu
struct  ck_tile::element_wise::Sigmoid
struct  ck_tile::element_wise::Silu
struct  ck_tile::element_wise::TanH
struct  ck_tile::element_wise::ACos
struct  ck_tile::element_wise::Neg
struct  ck_tile::element_wise::ATan
struct  ck_tile::element_wise::Sin
struct  ck_tile::element_wise::ASinH
struct  ck_tile::element_wise::Cos
struct  ck_tile::element_wise::ACosH
struct  ck_tile::element_wise::Tan
struct  ck_tile::element_wise::ATanH
struct  ck_tile::element_wise::SinH
struct  ck_tile::element_wise::Ceil
struct  ck_tile::element_wise::Exp
struct  ck_tile::element_wise::CosH
struct  ck_tile::element_wise::Floor
struct  ck_tile::element_wise::Log
struct  ck_tile::element_wise::ASin
struct  ck_tile::element_wise::Rcp
struct  ck_tile::element_wise::Swish
struct  ck_tile::element_wise::SoftRelu
struct  ck_tile::element_wise::Power
struct  ck_tile::element_wise::ClippedRelu
struct  ck_tile::element_wise::LeakyRelu
struct  ck_tile::element_wise::Elu
struct  ck_tile::element_wise::Logistic
struct  ck_tile::element_wise::Clamp
struct  ck_tile::element_wise::ConvInvscale
struct  ck_tile::element_wise::ConvScale
struct  ck_tile::element_wise::ConvScaleRelu
struct  ck_tile::element_wise::Cast< DstType, SrcType >
struct  ck_tile::element_wise::Compose< FuncA, FuncB, FuncADs, FuncBDs >
 Compose two unary element-wise functions into one. More...

Namespaces

namespace  ck_tile
namespace  ck_tile::element_wise

Macros

#define CONSTEXPR_LOOKUP_TABLE_FOR_BF16   1
#define CONSTEXPR_LOOKUP_TABLE_FOR_FP8   0
#define CONSTEXPR_LOOKUP_TABLE_FOR_BF8   0

Functions

template<typename T, std::size_t N, typename F, std::size_t... Is>
constexpr std::array< T, N > ck_tile::element_wise::make_lookup_table_impl (F &&func, std::index_sequence< Is... >)
template<typename T, std::size_t N, typename F>
constexpr std::array< T, N > ck_tile::element_wise::make_lookup_table (F &&func)
CK_TILE_DEVICE fp16x4_t ck_tile::element_wise::i4_to_half4 (int q)
 Fast int4x4 to fp16x8_t data type conversion based on paper "Who Says Elephants Can't Run: Bringing Large Scale MoE Models into Cloud Scale Production".
CK_TILE_DEVICE fp16x4_t ck_tile::element_wise::i4_to_half4_scale (int q, const fp16x2_t &scale)
 This function dequantizes 4 int4 values into 4 fp16 values and applies scaling.
CK_TILE_DEVICE bf16x4_t ck_tile::element_wise::i4_to_bhalf4 (int q)
 This function converts 4 4-bit integers into 4 bf16 values.
CK_TILE_DEVICE fp8x8_t ck_tile::element_wise::amd_assembly_i4_to_fp8x8 (int a)
 This function converts 8 packed 4-bit integers into 8 fp8 values.
CK_TILE_DEVICE float ck_tile::element_wise::amd_assembly_fp8_to_fp32 (uint32_t src)
CK_TILE_DEVICE float ck_tile::element_wise::amd_assembly_bf8_to_fp32 (uint32_t src)
CK_TILE_DEVICE bf8x8_t ck_tile::element_wise::amd_assembly_i4_to_bf8x8 (uint32_t a)
 This function converts 8 packed 4-bit integers into 8 bf8 values.

Macro Definition Documentation

◆ CONSTEXPR_LOOKUP_TABLE_FOR_BF16

#define CONSTEXPR_LOOKUP_TABLE_FOR_BF16   1

◆ CONSTEXPR_LOOKUP_TABLE_FOR_BF8

#define CONSTEXPR_LOOKUP_TABLE_FOR_BF8   0

◆ CONSTEXPR_LOOKUP_TABLE_FOR_FP8

#define CONSTEXPR_LOOKUP_TABLE_FOR_FP8   0