xpu
xpu::block_scan< T, BlockSize, Impl > Class Template Reference

Parallel scan inside a block. More...

Classes

struct  storage_t
 Temporary storage for the block scan. Should be allocated in shared memory. More...
 

Public Member Functions

template<typename ContextT >
XPU_D block_scan (ContextT &ctx, storage_t &storage)
 Construct a block scan object. More...
 
XPU_D block_scan (tpos &pos, storage_t &storage)
 Construct a block scan object. More...
 
XPU_D void exclusive_sum (T input, T &output)
 
template<typename ScanOp >
XPU_D void exclusive_sum (T input, T &output, T initial_value, ScanOp scan_op)
 
XPU_D void inclusive_sum (T input, T &output)
 
template<typename ScanOp >
XPU_D void inclusive_sum (T input, T &output, T initial_value, ScanOp scan_op)
 

Detailed Description

template<typename T, int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
class xpu::block_scan< T, BlockSize, Impl >

Parallel scan inside a block.

Constructor & Destructor Documentation

◆ block_scan() [1/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
template<typename ContextT >
XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan ( ContextT &  ctx,
storage_t storage 
)

Construct a block scan object.

Parameters
ctxKernel context.
storageTemporary storage for the block scan.
Note
This is a shortcut for block_scan(ctx.pos(), storage).
See also
block_scan::storage_t

◆ block_scan() [2/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
XPU_D xpu::block_scan< T, BlockSize, Impl >::block_scan ( tpos pos,
storage_t storage 
)

Construct a block scan object.

Parameters
posThread position.
storageTemporary storage for the block scan.
See also
block_scan::storage_t

Member Function Documentation

◆ exclusive_sum() [1/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum ( input,
T &  output 
)

◆ exclusive_sum() [2/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
template<typename ScanOp >
XPU_D void xpu::block_scan< T, BlockSize, Impl >::exclusive_sum ( input,
T &  output,
initial_value,
ScanOp  scan_op 
)

◆ inclusive_sum() [1/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum ( input,
T &  output 
)

◆ inclusive_sum() [2/2]

template<typename T , int BlockSize, xpu::driver_t Impl = XPU_COMPILATION_TARGET>
template<typename ScanOp >
XPU_D void xpu::block_scan< T, BlockSize, Impl >::inclusive_sum ( input,
T &  output,
initial_value,
ScanOp  scan_op 
)

The documentation for this class was generated from the following file: