◆ block_reduce() [1/2]
template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
template<typename ContextT >
◆ block_reduce() [2/2]
template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
◆ reduce()
template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
template<typename ReduceOp >
◆ sum()
template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
The documentation for this class was generated from the following file: