xpu
|
Parallel scan inside a block. More...
Classes | |
struct | storage_t |
Temporary storage for the block scan. Should be allocated in shared memory. More... | |
Public Member Functions | |
template<typename ContextT > | |
XPU_D | block_scan (ContextT &ctx, storage_t &storage) |
Construct a block scan object. More... | |
XPU_D | block_scan (tpos &pos, storage_t &storage) |
Construct a block scan object. More... | |
XPU_D void | exclusive_sum (T input, T &output) |
template<typename ScanOp > | |
XPU_D void | exclusive_sum (T input, T &output, T initial_value, ScanOp scan_op) |
XPU_D void | inclusive_sum (T input, T &output) |
template<typename ScanOp > | |
XPU_D void | inclusive_sum (T input, T &output, T initial_value, ScanOp scan_op) |
Parallel scan inside a block.
XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan | ( | ContextT & | ctx, |
storage_t & | storage | ||
) |
Construct a block scan object.
ctx | Kernel context. |
storage | Temporary storage for the block scan. |
block_scan(ctx.pos(), storage)
. XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan | ( | tpos & | pos, |
storage_t & | storage | ||
) |
Construct a block scan object.
pos | Thread position. |
storage | Temporary storage for the block scan. |
XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum | ( | T | input, |
T & | output | ||
) |
XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum | ( | T | input, |
T & | output, | ||
T | initial_value, | ||
ScanOp | scan_op | ||
) |
XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum | ( | T | input, |
T & | output | ||
) |
XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum | ( | T | input, |
T & | output, | ||
T | initial_value, | ||
ScanOp | scan_op | ||
) |