|
xpu
|
Parallel scan inside a block. More...
Classes | |
| struct | storage_t |
| Temporary storage for the block scan. Should be allocated in shared memory. More... | |
Public Member Functions | |
| template<typename ContextT > | |
| XPU_D | block_scan (ContextT &ctx, storage_t &storage) |
| Construct a block scan object. More... | |
| XPU_D | block_scan (tpos &pos, storage_t &storage) |
| Construct a block scan object. More... | |
| XPU_D void | exclusive_sum (T input, T &output) |
| template<typename ScanOp > | |
| XPU_D void | exclusive_sum (T input, T &output, T initial_value, ScanOp scan_op) |
| XPU_D void | inclusive_sum (T input, T &output) |
| template<typename ScanOp > | |
| XPU_D void | inclusive_sum (T input, T &output, T initial_value, ScanOp scan_op) |
Parallel scan inside a block.
| XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan | ( | ContextT & | ctx, |
| storage_t & | storage | ||
| ) |
Construct a block scan object.
| ctx | Kernel context. |
| storage | Temporary storage for the block scan. |
block_scan(ctx.pos(), storage). | XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan | ( | tpos & | pos, |
| storage_t & | storage | ||
| ) |
Construct a block scan object.
| pos | Thread position. |
| storage | Temporary storage for the block scan. |
| XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum | ( | T | input, |
| T & | output | ||
| ) |
| XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum | ( | T | input, |
| T & | output, | ||
| T | initial_value, | ||
| ScanOp | scan_op | ||
| ) |
| XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum | ( | T | input, |
| T & | output | ||
| ) |
| XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum | ( | T | input, |
| T & | output, | ||
| T | initial_value, | ||
| ScanOp | scan_op | ||
| ) |